Project Report: fawkez

Packagesummary org.jcoderz.commons.doclet

org.jcoderz.commons.doclet.HtmlCleaner

LineHitsNoteSource
1  /*
2   * $Id: HtmlCleaner.java 1011 2008-06-16 17:57:36Z amandel $
3   *
4   * Copyright 2006, The jCoderZ.org Project. All rights reserved.
5   *
6   * Redistribution and use in source and binary forms, with or without
7   * modification, are permitted provided that the following conditions are
8   * met:
9   *
10   *    * Redistributions of source code must retain the above copyright
11   *      notice, this list of conditions and the following disclaimer.
12   *    * Redistributions in binary form must reproduce the above
13   *      copyright notice, this list of conditions and the following
14   *      disclaimer in the documentation and/or other materials
15   *      provided with the distribution.
16   *    * Neither the name of the jCoderZ.org Project nor the names of
17   *      its contributors may be used to endorse or promote products
18   *      derived from this software without specific prior written
19   *      permission.
20   *
21   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
22   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24   * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS
25   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28   * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32   */
33  package org.jcoderz.commons.doclet;
34  
35  import java.io.ByteArrayInputStream;
36  import java.io.ByteArrayOutputStream;
37  import java.io.InputStream;
38  import java.io.PrintWriter;
39  import java.io.StringWriter;
40  import java.util.logging.Level;
41  import java.util.logging.Logger;
42  
43  import org.w3c.tidy.Configuration;
44  import org.w3c.tidy.Tidy;
45  
46  /**
47   * This class provides an easy interface to jTidy to clean up
48   * html fragments as used within javadoc.
49   *
50   * @author Andreas Mandel
51   */
52100 public class HtmlCleaner
53  {
54     /** The full qualified name of this class. */
5575    private static final String CLASSNAME = HtmlCleaner.class.getName();
56  
57     /** The logger to use. */
58100    private static final Logger logger = Logger.getLogger(CLASSNAME);
59  
60     private static final String FIX_HEADER
61           = "<html><head><title>clean</title></head><body>";
62  
63     private static final String FIX_FOOTER
64           = "</body></html>";
65  
66100    private String mWarnings = "";
67100    private boolean mHasErrors = false;
68  
69     /**
70      * Converts the given HTML fragment string into wellformed xhtml.
71      * @param in the html fragment to be cleaned up.
72      * @return a cleaned up wellformed xhtml version of the in string.
73      */
74     public String clean (CharSequence in)
75     {
76100       if (logger.isLoggable(Level.FINER))
77        {
78100          logger.entering(CLASSNAME, "clean(CharSequence)", in);
79        }
80100       mHasErrors = false;
81100       final Tidy tidy = new Tidy();
82100       final String inData = FIX_HEADER + in + FIX_FOOTER;
83100       final StringWriter err = new StringWriter();
84100       String result = null;
85        try
86        {
87100          tidy.setCharEncoding(Configuration.UTF8);
88100          tidy.setMakeClean(true);
89100          tidy.setXmlOut(true);
90100          tidy.setRawOut(true);
91100          tidy.setNumEntities(true);
92100          tidy.setWraplen(0); // do not care about line length
93           // tidy.setOnlyErrors(true);
94100          tidy.setErrout(new PrintWriter(err));
95  
96100          final InputStream inStream = new ByteArrayInputStream(
97                 inData.getBytes("utf-8"));
98  
99100          final ByteArrayOutputStream out = new ByteArrayOutputStream();
100  
101100          tidy.parse(inStream, out);
102  
103100          final String resultString = new String(out.toByteArray(), "utf-8");
104  
105100          final int start = resultString.indexOf("<body>");
106100          final int end = resultString.lastIndexOf("</body>");
107  
108100          if (start != -1 && end != -1)
109           {
110100             result = resultString.substring(
111                    start + "<body>\n".length(), end).trim();
112           }
113           else
114           {
115100             result = "Invalid HTML could not be parsed.";
116           }
117  
118100          if (tidy.getParseWarnings() == 0 && tidy.getParseErrors() == 0)
119           {
120100             mWarnings = "";
121           }
122           else
123           {
124100             mWarnings = err.toString();
125           }
126100          mHasErrors = (tidy.getParseErrors() == 0);
127        }
1280       catch (Exception ex)
129        {
1300          result = "Invalid HTML could not be parsed.";
1310          err.write(result);
1320          err.write("Got exception:");
1330          err.write(ex.toString());
1340          ex.printStackTrace(new PrintWriter(err));
1350          mWarnings = err.toString();
1360          logger.log(Level.FINER,
137                 "Could not handle html fragment. '" + in + "'." , ex);
1380          mHasErrors = true;
139100       }
140100       if (logger.isLoggable(Level.FINER))
141        {
142100          logger.exiting(CLASSNAME, "clean(CharSequence)", result);
143        }
144100       return result;
145     }
146  
147     /**
148      * Returns the warnings encountered during last clean.
149      * @return the warnings encountered during last clean.
150      */
151     public String getWarnings ()
152     {
153100       return mWarnings;
154     }
155  
156 (1)   public boolean hasErrors ()
157     {
1580       return mHasErrors;
159     }
160  }
161  

Findings in this File

c (1) 156 : 4 Missing a Javadoc comment.