root/trunk/src/java/org/jcoderz/phoenix/report/Syntax.java

Revision 1496, 11.9 kB (checked in by amandel, 3 years ago)

Take care for findings.

  • Property svn:mime-type set to text/plain
Line 
1/*
2 * $Id: Java2Html.java 1238 2008-11-03 12:37:53Z amandel $
3 *
4 * Copyright 2006, The jCoderZ.org Project. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 *    * Redistributions of source code must retain the above copyright
11 *      notice, this list of conditions and the following disclaimer.
12 *    * Redistributions in binary form must reproduce the above
13 *      copyright notice, this list of conditions and the following
14 *      disclaimer in the documentation and/or other materials
15 *      provided with the distribution.
16 *    * Neither the name of the jCoderZ.org Project nor the names of
17 *      its contributors may be used to endorse or promote products
18 *      derived from this software without specific prior written
19 *      permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33package org.jcoderz.phoenix.report;
34
35import java.io.BufferedReader;
36import java.io.File;
37import java.io.FileInputStream;
38import java.io.IOException;
39import java.io.InputStreamReader;
40import java.io.LineNumberReader;
41import java.io.Reader;
42import java.io.StringReader;
43import java.nio.charset.Charset;
44import java.util.logging.Logger;
45
46import javax.swing.text.Segment;
47
48import org.gjt.sp.jedit.Mode;
49import org.gjt.sp.jedit.syntax.DefaultTokenHandler;
50import org.gjt.sp.jedit.syntax.ModeProvider;
51import org.gjt.sp.jedit.syntax.ParserRuleSet;
52import org.gjt.sp.jedit.syntax.Token;
53import org.gjt.sp.jedit.syntax.TokenMarker;
54import org.gjt.sp.jedit.syntax.TokenMarker.LineContext;
55import org.jcoderz.commons.util.Assert;
56import org.jcoderz.commons.util.IoUtil;
57
58
59/**
60 * Splits an input file into several tokens suitable for syntax
61 * highlighting.
62 * This class encapsulates the access to the jEdit syntax
63 * highlighter package. No jEdit related classes should be
64 * passed by interfaces of this class.
65 *
66 * @author Andreas Mandel
67 */
68public class Syntax
69{
70    private static final int MAX_RATIO_ILLEGAL_CHARACTERS = 10;
71    private static final int MAX_AVERAGE_LINE_LENGTH = 200;
72    private static final int BINARY_TEST_PROBE_CHARACTERS = 1024;
73    private static final String CLASSNAME = Syntax.class.getName();
74    private static final Logger LOGGER = Logger.getLogger(CLASSNAME);
75   
76    private final Charset mSourceCharset;
77    private final int mTabWidth;
78    private final char[] mFileContent;
79    private int mFileContentPos;
80    // CHECKME: ate tabs in the token counted to the length?
81    private Token mToken = emptyToken();
82    private int mCurrentLineNumber;
83    private int mCurrentLinePos;
84    private Segment mCurrentLine;
85    private int mNumberOfLines;
86
87    private final TokenMarker mTokenMarker;
88    private final DefaultTokenHandler mTokenHandler
89        = new DefaultTokenHandler();
90    private LineContext mLineContext = null;
91    /** First line of the file. */
92    private String mFirstLine;
93   
94    static
95    {
96        SyntaxModeCatalogHandler.loadModes();
97    }
98
99    /**
100     * Initializes an Ascii2Html instance.
101     *
102     * @param in the source file to read.
103     * @param charSet the char set to use when reading the source file.
104     *   If null the platform default char set will be used. 
105     * @param tabWidth the tab width to use when calculating the cursor
106     *   position.
107     * @throws IOException if a error occurs while reading the source file.
108     */
109    public Syntax (File in, Charset charSet, int tabWidth)
110        throws IOException
111    {
112        Assert.notNull(in, "in");
113        mSourceCharset = charSet == null ? Charset.defaultCharset() : charSet;
114        mTabWidth = tabWidth;
115        mFileContent = readFile(in).toCharArray();
116        mFileContentPos = 0;
117        mCurrentLineNumber = 0;
118        mCurrentLine = null;
119        final Mode mode
120            = ModeProvider.instance.getModeForFile(in.getName(), mFirstLine);
121        if (mode == null)
122        {
123            if (isBinary(in.getAbsolutePath(), mFileContent))
124            {
125                throw new RuntimeException("No html view for binary file '"
126                    + in.getAbsolutePath() + "'.");
127            }
128           
129            LOGGER.fine("Could not find mode file for '" + in.getName()
130                + "'. Is the jedit-syntax.jar on the classpath?");
131            mTokenMarker = new TokenMarker();
132            mTokenMarker.addRuleSet(new ParserRuleSet("text", "MAIN"));
133        }
134        else
135        {
136            mTokenMarker = mode.getTokenMarker();
137        }
138    }
139
140    /**
141     * Returns the number of lines of the parsed file.
142     * The value is available after creation of the class.
143     * @return the number of lines of the parsed file.
144     */
145    public int getNumberOfLines ()
146    {
147        return mNumberOfLines;
148    }
149   
150    /**
151     * The line number of the currently parsed token.
152     * Counting starts with line 1. Nevertheless prior the first call
153     * to {@link #nextToken()} 0 is returned.
154     * @return the line number of the currently parsed token.
155     */
156    public int getCurrentLineNumber ()
157    {
158        return mCurrentLineNumber;
159    }
160   
161    /**
162     * Returns the cursor position of start of the current token.
163     * @return the cursor position of start of the current token.
164     */
165    public int getCurrentLinePos ()
166    {
167        return mCurrentLinePos;
168    }
169   
170    /**
171     * Returns the current token type as string.
172     * To be used as symbolic identifier of the token. Possible
173     * return values can be fount in {@link Token#tokenToString(byte)}.
174     * For the {@link Token#END} null is returned.
175     * @return the current token type as string.
176     */
177    public String getCurrentTokenType ()
178    {
179        final String result;
180        if (mToken.id == Token.END)
181        {
182            result = null;
183        }
184        else
185        {
186            result = Token.tokenToString(mToken.id);
187        }
188        return result;
189    }
190
191    /**
192     * Returns the length of the current reported token.
193     * @return the length of the current reported token.
194     */
195    public int getCurrentTokenLength ()
196    {
197        return mToken.length;
198    }
199
200    /**
201     * Parses the next token and returns its textual content as string.
202     * @return the textual content of the new token.
203     */
204    public String nextToken ()
205    {
206        if (mCurrentLine == null
207            || mToken.id == Token.END)
208        {
209            nextLine();
210        }
211        else
212        {
213            mCurrentLinePos += mToken.length;
214            mToken = mToken.next;
215        }
216        final String result;
217        if (mCurrentLine.count == 0)
218        {
219            mToken = emptyToken();
220            result = "";
221        }
222        else
223        {
224            if (mToken != null)
225            {
226                result
227                    = new String(mFileContent,
228                        mCurrentLine.offset + mToken.offset, mToken.length);
229            }
230            else
231            {
232                result = "";
233                mToken = emptyToken();
234            }
235        }
236        return result;
237    }
238   
239    /**
240     * Forward to next line. Takes care for different line ending styles.
241     * Parsing for next line is started.
242     */
243    private void nextLine ()
244    {
245        if (mFileContentPos > mFileContent.length)
246        {
247            mCurrentLine = null; // END OF FILE
248            mCurrentLineNumber = mNumberOfLines + 1;
249        }
250        else
251        {
252            int pos = mFileContentPos;
253            while (pos < mFileContent.length
254                && mFileContent[pos] != '\n'
255                && mFileContent[pos] != '\r')
256            {
257                pos++;
258            }
259            final int currentLineEnd = pos;
260            if (pos < mFileContent.length
261                && (mFileContent[pos] == '\n'
262                    || mFileContent[pos] == '\r'))
263            {
264                pos++;
265            }
266            if (pos < mFileContent.length
267                && mFileContent[pos - 1] != mFileContent[pos]
268                && (mFileContent[pos] == '\n'
269                    || mFileContent[pos] == '\r'))
270            {
271                pos++;
272            }
273            mCurrentLine
274                = new Segment(mFileContent,
275                    mFileContentPos, currentLineEnd - mFileContentPos);
276            mCurrentLineNumber++;
277            mFileContentPos = pos;
278            mCurrentLinePos = 1;
279           
280            if (mCurrentLine.count > 0)
281            {
282                mTokenHandler.init();
283                mLineContext
284                    = mTokenMarker.markTokens(
285                        mLineContext, mTokenHandler, mCurrentLine);
286                mToken = mTokenHandler.getTokens();
287            }
288            else
289            {
290                mToken = emptyToken();
291            }
292        }
293    }
294   
295    private String readFile (File in)
296        throws IOException
297    {
298        String result = "";
299        final FileInputStream fis = new FileInputStream(in);
300        Reader reader = null;
301        LineNumberReader lnr = null;
302        try
303        {   
304            reader = new InputStreamReader(fis, mSourceCharset);
305            lnr = new LineNumberReader(reader);
306            result = IoUtil.readFully(lnr);
307            mNumberOfLines = lnr.getLineNumber();
308            mFirstLine
309                = new BufferedReader(new StringReader(result)).readLine();
310        }
311        finally
312        {
313            IoUtil.close(lnr);
314            IoUtil.close(reader);
315            IoUtil.close(fis);
316        }
317        return result;
318    }
319   
320    private static Token emptyToken ()
321    {
322        return new Token(Token.END, 0, 0, null);
323    }
324
325    static boolean isBinary (String name, char[] fileContent)
326    {
327        int newLines = 0;
328        int chars = 0;
329        int illegal = 0;
330        int i;
331        for (i = 0; i < fileContent.length
332            && i < BINARY_TEST_PROBE_CHARACTERS; i++)
333        {
334            final char c = fileContent[i];
335            if (c == '\n' || c == '\r')
336            {
337                newLines++;
338            }
339            else if (Character.isWhitespace(c))
340            {
341                chars++;
342            }
343            else if (Character.isISOControl(c))
344            {
345                illegal++;
346            }
347            else if (Character.isDefined(c))
348            {
349                chars++;
350            }
351            else
352            {
353                illegal++;
354            }
355        }
356        boolean result = false; // assume a text file
357        // less than a new line per 200 characters
358        if (((newLines + 1) * MAX_AVERAGE_LINE_LENGTH) < i)
359        {
360            result = true;
361        }
362        // to many 'illegal' chars
363        else if (illegal * MAX_RATIO_ILLEGAL_CHARACTERS > chars)
364        {
365            result = true;
366        }
367        LOGGER.finest("For file " + name + " tested " + i + " chars with "
368            + newLines + " newlines, " + chars + " legal chars, "
369            + illegal + " illegal chars. -> "
370            + (result ? "isBinary" : "isNotBinary"));
371        return result;
372    }
373
374}
Note: See TracBrowser for help on using the browser.