| 1 | /* |
|---|
| 2 | * $Id: Java2Html.java 1238 2008-11-03 12:37:53Z amandel $ |
|---|
| 3 | * |
|---|
| 4 | * Copyright 2006, The jCoderZ.org Project. All rights reserved. |
|---|
| 5 | * |
|---|
| 6 | * Redistribution and use in source and binary forms, with or without |
|---|
| 7 | * modification, are permitted provided that the following conditions are |
|---|
| 8 | * met: |
|---|
| 9 | * |
|---|
| 10 | * * Redistributions of source code must retain the above copyright |
|---|
| 11 | * notice, this list of conditions and the following disclaimer. |
|---|
| 12 | * * Redistributions in binary form must reproduce the above |
|---|
| 13 | * copyright notice, this list of conditions and the following |
|---|
| 14 | * disclaimer in the documentation and/or other materials |
|---|
| 15 | * provided with the distribution. |
|---|
| 16 | * * Neither the name of the jCoderZ.org Project nor the names of |
|---|
| 17 | * its contributors may be used to endorse or promote products |
|---|
| 18 | * derived from this software without specific prior written |
|---|
| 19 | * permission. |
|---|
| 20 | * |
|---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND |
|---|
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|---|
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|---|
| 24 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS |
|---|
| 25 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|---|
| 26 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|---|
| 27 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
|---|
| 28 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 29 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|---|
| 30 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|---|
| 31 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 32 | */ |
|---|
| 33 | package org.jcoderz.phoenix.report; |
|---|
| 34 | |
|---|
| 35 | import java.io.BufferedReader; |
|---|
| 36 | import java.io.File; |
|---|
| 37 | import java.io.FileInputStream; |
|---|
| 38 | import java.io.IOException; |
|---|
| 39 | import java.io.InputStreamReader; |
|---|
| 40 | import java.io.LineNumberReader; |
|---|
| 41 | import java.io.Reader; |
|---|
| 42 | import java.io.StringReader; |
|---|
| 43 | import java.nio.charset.Charset; |
|---|
| 44 | import java.util.logging.Logger; |
|---|
| 45 | |
|---|
| 46 | import javax.swing.text.Segment; |
|---|
| 47 | |
|---|
| 48 | import org.gjt.sp.jedit.Mode; |
|---|
| 49 | import org.gjt.sp.jedit.syntax.DefaultTokenHandler; |
|---|
| 50 | import org.gjt.sp.jedit.syntax.ModeProvider; |
|---|
| 51 | import org.gjt.sp.jedit.syntax.ParserRuleSet; |
|---|
| 52 | import org.gjt.sp.jedit.syntax.Token; |
|---|
| 53 | import org.gjt.sp.jedit.syntax.TokenMarker; |
|---|
| 54 | import org.gjt.sp.jedit.syntax.TokenMarker.LineContext; |
|---|
| 55 | import org.jcoderz.commons.util.Assert; |
|---|
| 56 | import org.jcoderz.commons.util.IoUtil; |
|---|
| 57 | |
|---|
| 58 | |
|---|
| 59 | /** |
|---|
| 60 | * Splits an input file into several tokens suitable for syntax |
|---|
| 61 | * highlighting. |
|---|
| 62 | * This class encapsulates the access to the jEdit syntax |
|---|
| 63 | * highlighter package. No jEdit related classes should be |
|---|
| 64 | * passed by interfaces of this class. |
|---|
| 65 | * |
|---|
| 66 | * @author Andreas Mandel |
|---|
| 67 | */ |
|---|
| 68 | public class Syntax |
|---|
| 69 | { |
|---|
| 70 | private static final int MAX_RATIO_ILLEGAL_CHARACTERS = 10; |
|---|
| 71 | private static final int MAX_AVERAGE_LINE_LENGTH = 200; |
|---|
| 72 | private static final int BINARY_TEST_PROBE_CHARACTERS = 1024; |
|---|
| 73 | private static final String CLASSNAME = Syntax.class.getName(); |
|---|
| 74 | private static final Logger LOGGER = Logger.getLogger(CLASSNAME); |
|---|
| 75 | |
|---|
| 76 | private final Charset mSourceCharset; |
|---|
| 77 | private final int mTabWidth; |
|---|
| 78 | private final char[] mFileContent; |
|---|
| 79 | private int mFileContentPos; |
|---|
| 80 | // CHECKME: ate tabs in the token counted to the length? |
|---|
| 81 | private Token mToken = emptyToken(); |
|---|
| 82 | private int mCurrentLineNumber; |
|---|
| 83 | private int mCurrentLinePos; |
|---|
| 84 | private Segment mCurrentLine; |
|---|
| 85 | private int mNumberOfLines; |
|---|
| 86 | |
|---|
| 87 | private final TokenMarker mTokenMarker; |
|---|
| 88 | private final DefaultTokenHandler mTokenHandler |
|---|
| 89 | = new DefaultTokenHandler(); |
|---|
| 90 | private LineContext mLineContext = null; |
|---|
| 91 | /** First line of the file. */ |
|---|
| 92 | private String mFirstLine; |
|---|
| 93 | |
|---|
| 94 | static |
|---|
| 95 | { |
|---|
| 96 | SyntaxModeCatalogHandler.loadModes(); |
|---|
| 97 | } |
|---|
| 98 | |
|---|
| 99 | /** |
|---|
| 100 | * Initializes an Ascii2Html instance. |
|---|
| 101 | * |
|---|
| 102 | * @param in the source file to read. |
|---|
| 103 | * @param charSet the char set to use when reading the source file. |
|---|
| 104 | * If null the platform default char set will be used. |
|---|
| 105 | * @param tabWidth the tab width to use when calculating the cursor |
|---|
| 106 | * position. |
|---|
| 107 | * @throws IOException if a error occurs while reading the source file. |
|---|
| 108 | */ |
|---|
| 109 | public Syntax (File in, Charset charSet, int tabWidth) |
|---|
| 110 | throws IOException |
|---|
| 111 | { |
|---|
| 112 | Assert.notNull(in, "in"); |
|---|
| 113 | mSourceCharset = charSet == null ? Charset.defaultCharset() : charSet; |
|---|
| 114 | mTabWidth = tabWidth; |
|---|
| 115 | mFileContent = readFile(in).toCharArray(); |
|---|
| 116 | mFileContentPos = 0; |
|---|
| 117 | mCurrentLineNumber = 0; |
|---|
| 118 | mCurrentLine = null; |
|---|
| 119 | final Mode mode |
|---|
| 120 | = ModeProvider.instance.getModeForFile(in.getName(), mFirstLine); |
|---|
| 121 | if (mode == null) |
|---|
| 122 | { |
|---|
| 123 | if (isBinary(in.getAbsolutePath(), mFileContent)) |
|---|
| 124 | { |
|---|
| 125 | throw new RuntimeException("No html view for binary file '" |
|---|
| 126 | + in.getAbsolutePath() + "'."); |
|---|
| 127 | } |
|---|
| 128 | |
|---|
| 129 | LOGGER.fine("Could not find mode file for '" + in.getName() |
|---|
| 130 | + "'. Is the jedit-syntax.jar on the classpath?"); |
|---|
| 131 | mTokenMarker = new TokenMarker(); |
|---|
| 132 | mTokenMarker.addRuleSet(new ParserRuleSet("text", "MAIN")); |
|---|
| 133 | } |
|---|
| 134 | else |
|---|
| 135 | { |
|---|
| 136 | mTokenMarker = mode.getTokenMarker(); |
|---|
| 137 | } |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | /** |
|---|
| 141 | * Returns the number of lines of the parsed file. |
|---|
| 142 | * The value is available after creation of the class. |
|---|
| 143 | * @return the number of lines of the parsed file. |
|---|
| 144 | */ |
|---|
| 145 | public int getNumberOfLines () |
|---|
| 146 | { |
|---|
| 147 | return mNumberOfLines; |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | /** |
|---|
| 151 | * The line number of the currently parsed token. |
|---|
| 152 | * Counting starts with line 1. Nevertheless prior the first call |
|---|
| 153 | * to {@link #nextToken()} 0 is returned. |
|---|
| 154 | * @return the line number of the currently parsed token. |
|---|
| 155 | */ |
|---|
| 156 | public int getCurrentLineNumber () |
|---|
| 157 | { |
|---|
| 158 | return mCurrentLineNumber; |
|---|
| 159 | } |
|---|
| 160 | |
|---|
| 161 | /** |
|---|
| 162 | * Returns the cursor position of start of the current token. |
|---|
| 163 | * @return the cursor position of start of the current token. |
|---|
| 164 | */ |
|---|
| 165 | public int getCurrentLinePos () |
|---|
| 166 | { |
|---|
| 167 | return mCurrentLinePos; |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | /** |
|---|
| 171 | * Returns the current token type as string. |
|---|
| 172 | * To be used as symbolic identifier of the token. Possible |
|---|
| 173 | * return values can be fount in {@link Token#tokenToString(byte)}. |
|---|
| 174 | * For the {@link Token#END} null is returned. |
|---|
| 175 | * @return the current token type as string. |
|---|
| 176 | */ |
|---|
| 177 | public String getCurrentTokenType () |
|---|
| 178 | { |
|---|
| 179 | final String result; |
|---|
| 180 | if (mToken.id == Token.END) |
|---|
| 181 | { |
|---|
| 182 | result = null; |
|---|
| 183 | } |
|---|
| 184 | else |
|---|
| 185 | { |
|---|
| 186 | result = Token.tokenToString(mToken.id); |
|---|
| 187 | } |
|---|
| 188 | return result; |
|---|
| 189 | } |
|---|
| 190 | |
|---|
| 191 | /** |
|---|
| 192 | * Returns the length of the current reported token. |
|---|
| 193 | * @return the length of the current reported token. |
|---|
| 194 | */ |
|---|
| 195 | public int getCurrentTokenLength () |
|---|
| 196 | { |
|---|
| 197 | return mToken.length; |
|---|
| 198 | } |
|---|
| 199 | |
|---|
| 200 | /** |
|---|
| 201 | * Parses the next token and returns its textual content as string. |
|---|
| 202 | * @return the textual content of the new token. |
|---|
| 203 | */ |
|---|
| 204 | public String nextToken () |
|---|
| 205 | { |
|---|
| 206 | if (mCurrentLine == null |
|---|
| 207 | || mToken.id == Token.END) |
|---|
| 208 | { |
|---|
| 209 | nextLine(); |
|---|
| 210 | } |
|---|
| 211 | else |
|---|
| 212 | { |
|---|
| 213 | mCurrentLinePos += mToken.length; |
|---|
| 214 | mToken = mToken.next; |
|---|
| 215 | } |
|---|
| 216 | final String result; |
|---|
| 217 | if (mCurrentLine.count == 0) |
|---|
| 218 | { |
|---|
| 219 | mToken = emptyToken(); |
|---|
| 220 | result = ""; |
|---|
| 221 | } |
|---|
| 222 | else |
|---|
| 223 | { |
|---|
| 224 | if (mToken != null) |
|---|
| 225 | { |
|---|
| 226 | result |
|---|
| 227 | = new String(mFileContent, |
|---|
| 228 | mCurrentLine.offset + mToken.offset, mToken.length); |
|---|
| 229 | } |
|---|
| 230 | else |
|---|
| 231 | { |
|---|
| 232 | result = ""; |
|---|
| 233 | mToken = emptyToken(); |
|---|
| 234 | } |
|---|
| 235 | } |
|---|
| 236 | return result; |
|---|
| 237 | } |
|---|
| 238 | |
|---|
| 239 | /** |
|---|
| 240 | * Forward to next line. Takes care for different line ending styles. |
|---|
| 241 | * Parsing for next line is started. |
|---|
| 242 | */ |
|---|
| 243 | private void nextLine () |
|---|
| 244 | { |
|---|
| 245 | if (mFileContentPos > mFileContent.length) |
|---|
| 246 | { |
|---|
| 247 | mCurrentLine = null; // END OF FILE |
|---|
| 248 | mCurrentLineNumber = mNumberOfLines + 1; |
|---|
| 249 | } |
|---|
| 250 | else |
|---|
| 251 | { |
|---|
| 252 | int pos = mFileContentPos; |
|---|
| 253 | while (pos < mFileContent.length |
|---|
| 254 | && mFileContent[pos] != '\n' |
|---|
| 255 | && mFileContent[pos] != '\r') |
|---|
| 256 | { |
|---|
| 257 | pos++; |
|---|
| 258 | } |
|---|
| 259 | final int currentLineEnd = pos; |
|---|
| 260 | if (pos < mFileContent.length |
|---|
| 261 | && (mFileContent[pos] == '\n' |
|---|
| 262 | || mFileContent[pos] == '\r')) |
|---|
| 263 | { |
|---|
| 264 | pos++; |
|---|
| 265 | } |
|---|
| 266 | if (pos < mFileContent.length |
|---|
| 267 | && mFileContent[pos - 1] != mFileContent[pos] |
|---|
| 268 | && (mFileContent[pos] == '\n' |
|---|
| 269 | || mFileContent[pos] == '\r')) |
|---|
| 270 | { |
|---|
| 271 | pos++; |
|---|
| 272 | } |
|---|
| 273 | mCurrentLine |
|---|
| 274 | = new Segment(mFileContent, |
|---|
| 275 | mFileContentPos, currentLineEnd - mFileContentPos); |
|---|
| 276 | mCurrentLineNumber++; |
|---|
| 277 | mFileContentPos = pos; |
|---|
| 278 | mCurrentLinePos = 1; |
|---|
| 279 | |
|---|
| 280 | if (mCurrentLine.count > 0) |
|---|
| 281 | { |
|---|
| 282 | mTokenHandler.init(); |
|---|
| 283 | mLineContext |
|---|
| 284 | = mTokenMarker.markTokens( |
|---|
| 285 | mLineContext, mTokenHandler, mCurrentLine); |
|---|
| 286 | mToken = mTokenHandler.getTokens(); |
|---|
| 287 | } |
|---|
| 288 | else |
|---|
| 289 | { |
|---|
| 290 | mToken = emptyToken(); |
|---|
| 291 | } |
|---|
| 292 | } |
|---|
| 293 | } |
|---|
| 294 | |
|---|
| 295 | private String readFile (File in) |
|---|
| 296 | throws IOException |
|---|
| 297 | { |
|---|
| 298 | String result = ""; |
|---|
| 299 | final FileInputStream fis = new FileInputStream(in); |
|---|
| 300 | Reader reader = null; |
|---|
| 301 | LineNumberReader lnr = null; |
|---|
| 302 | try |
|---|
| 303 | { |
|---|
| 304 | reader = new InputStreamReader(fis, mSourceCharset); |
|---|
| 305 | lnr = new LineNumberReader(reader); |
|---|
| 306 | result = IoUtil.readFully(lnr); |
|---|
| 307 | mNumberOfLines = lnr.getLineNumber(); |
|---|
| 308 | mFirstLine |
|---|
| 309 | = new BufferedReader(new StringReader(result)).readLine(); |
|---|
| 310 | } |
|---|
| 311 | finally |
|---|
| 312 | { |
|---|
| 313 | IoUtil.close(lnr); |
|---|
| 314 | IoUtil.close(reader); |
|---|
| 315 | IoUtil.close(fis); |
|---|
| 316 | } |
|---|
| 317 | return result; |
|---|
| 318 | } |
|---|
| 319 | |
|---|
| 320 | private static Token emptyToken () |
|---|
| 321 | { |
|---|
| 322 | return new Token(Token.END, 0, 0, null); |
|---|
| 323 | } |
|---|
| 324 | |
|---|
| 325 | static boolean isBinary (String name, char[] fileContent) |
|---|
| 326 | { |
|---|
| 327 | int newLines = 0; |
|---|
| 328 | int chars = 0; |
|---|
| 329 | int illegal = 0; |
|---|
| 330 | int i; |
|---|
| 331 | for (i = 0; i < fileContent.length |
|---|
| 332 | && i < BINARY_TEST_PROBE_CHARACTERS; i++) |
|---|
| 333 | { |
|---|
| 334 | final char c = fileContent[i]; |
|---|
| 335 | if (c == '\n' || c == '\r') |
|---|
| 336 | { |
|---|
| 337 | newLines++; |
|---|
| 338 | } |
|---|
| 339 | else if (Character.isWhitespace(c)) |
|---|
| 340 | { |
|---|
| 341 | chars++; |
|---|
| 342 | } |
|---|
| 343 | else if (Character.isISOControl(c)) |
|---|
| 344 | { |
|---|
| 345 | illegal++; |
|---|
| 346 | } |
|---|
| 347 | else if (Character.isDefined(c)) |
|---|
| 348 | { |
|---|
| 349 | chars++; |
|---|
| 350 | } |
|---|
| 351 | else |
|---|
| 352 | { |
|---|
| 353 | illegal++; |
|---|
| 354 | } |
|---|
| 355 | } |
|---|
| 356 | boolean result = false; // assume a text file |
|---|
| 357 | // less than a new line per 200 characters |
|---|
| 358 | if (((newLines + 1) * MAX_AVERAGE_LINE_LENGTH) < i) |
|---|
| 359 | { |
|---|
| 360 | result = true; |
|---|
| 361 | } |
|---|
| 362 | // to many 'illegal' chars |
|---|
| 363 | else if (illegal * MAX_RATIO_ILLEGAL_CHARACTERS > chars) |
|---|
| 364 | { |
|---|
| 365 | result = true; |
|---|
| 366 | } |
|---|
| 367 | LOGGER.finest("For file " + name + " tested " + i + " chars with " |
|---|
| 368 | + newLines + " newlines, " + chars + " legal chars, " |
|---|
| 369 | + illegal + " illegal chars. -> " |
|---|
| 370 | + (result ? "isBinary" : "isNotBinary")); |
|---|
| 371 | return result; |
|---|
| 372 | } |
|---|
| 373 | |
|---|
| 374 | } |
|---|