|
|||||||||
| Home >> All >> org >> w3c >> [ tidy overview ] | PREV CLASS NEXT CLASS | ||||||||
SUMMARY: JAVADOC | SOURCE | DOWNLOAD | NESTED | FIELD | CONSTR | METHOD |
DETAIL: FIELD | CONSTR | METHOD | ||||||||
org.w3c.tidy
Class Tidy

java.lang.Objectorg.w3c.tidy.Tidy
- All Implemented Interfaces:
- java.io.Serializable
- public class Tidy
- extends java.lang.Object
- implements java.io.Serializable
- extends java.lang.Object
HTML parser and pretty printer
(c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.java for the copyright notice. Derived from HTML Tidy Release 4 Aug 2000
Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts Institute of Technology, Institut National de Recherche en Informatique et en Automatique, Keio University). All Rights Reserved.
Contributing Author(s):
Dave Raggett
Andy Quick (translation to Java)
The contributing author(s) would like to thank all those who helped with testing, bug fixes, and patience. This wouldn't have been possible without all of you.
COPYRIGHT NOTICE:
This software and documentation is provided "as is," and
the copyright holders and contributing author(s) make no
representations or warranties, express or implied, including
but not limited to, warranties of merchantability or fitness
for any particular purpose or that the use of the software or
documentation will not infringe any third party patents,
copyrights, trademarks or other rights.
The copyright holders and contributing author(s) will not be liable for any direct, indirect, special or consequential damages arising out of any use of the software or documentation, even if advised of the possibility of such damage.
Permission is hereby granted to use, copy, modify, and distribute this source code, or portions hereof, documentation and executables, for any purpose, without fee, subject to the following restrictions:
- The origin of this source code must not be misrepresented.
- Altered versions must be plainly marked as such and must not be misrepresented as being the original source.
- This Copyright notice may not be removed or altered from any source or altered source distribution.
The copyright holders and contributing author(s) specifically permit, without fee, and encourage the use of this source code as a component for supporting the Hypertext Markup Language in commercial products. If you use this source code in a product, acknowledgment is not required but would be appreciated.
- Version:
- 1.0, 1999/05/22, 1.0.1, 1999/05/29, 1.1, 1999/06/18 Java Bean, 1.2, 1999/07/10 Tidy Release 7 Jul 1999, 1.3, 1999/07/30 Tidy Release 26 Jul 1999, 1.4, 1999/09/04 DOM support, 1.5, 1999/10/23 Tidy Release 27 Sep 1999, 1.6, 1999/11/01 Tidy Release 22 Oct 1999, 1.7, 1999/12/06 Tidy Release 30 Nov 1999, 1.8, 2000/01/22 Tidy Release 13 Jan 2000, 1.9, 2000/06/03 Tidy Release 30 Apr 2000, 1.10, 2000/07/22 Tidy Release 8 Jul 2000, 1.11, 2000/08/16 Tidy Release 4 Aug 2000
| Field Summary | |
private Configuration |
configuration
|
private java.io.PrintWriter |
errout
|
private boolean |
initialized
|
private java.lang.String |
inputStreamName
|
private int |
parseErrors
|
private int |
parseWarnings
|
(package private) static long |
serialVersionUID
|
private java.io.PrintWriter |
stderr
|
| Constructor Summary | |
Tidy()
|
|
| Method Summary | |
static org.w3c.dom.Document |
createEmptyDocument()
Creates an empty DOM Document. |
java.lang.String |
getAltText()
|
boolean |
getBreakBeforeBR()
|
boolean |
getBurstSlides()
|
int |
getCharEncoding()
|
Configuration |
getConfiguration()
|
java.lang.String |
getDocType()
|
boolean |
getDropEmptyParas()
|
boolean |
getDropFontTags()
|
boolean |
getEmacs()
|
boolean |
getEncloseBlockText()
|
boolean |
getEncloseText()
|
java.lang.String |
getErrfile()
|
java.io.PrintWriter |
getErrout()
Errout - the error output stream |
boolean |
getFixBackslash()
|
boolean |
getFixComments()
|
boolean |
getHideEndTags()
|
boolean |
getIndentAttributes()
|
boolean |
getIndentContent()
|
java.lang.String |
getInputStreamName()
|
boolean |
getKeepFileTimes()
|
boolean |
getLiteralAttribs()
|
boolean |
getLogicalEmphasis()
|
boolean |
getMakeClean()
|
boolean |
getNumEntities()
|
boolean |
getOnlyErrors()
|
int |
getParseErrors()
ParseErrors - the number of errors that occurred in the most recent parse operation |
int |
getParseWarnings()
ParseWarnings - the number of warnings that occurred in the most recent parse operation |
boolean |
getQuiet()
|
boolean |
getQuoteAmpersand()
|
boolean |
getQuoteMarks()
|
boolean |
getQuoteNbsp()
|
boolean |
getRawOut()
|
boolean |
getShowWarnings()
|
java.lang.String |
getSlidestyle()
|
boolean |
getSmartIndent()
|
int |
getSpaces()
|
java.io.PrintWriter |
getStderr()
|
int |
getTabsize()
|
boolean |
getTidyMark()
|
boolean |
getUpperCaseAttrs()
|
boolean |
getUpperCaseTags()
|
boolean |
getWord2000()
|
boolean |
getWrapAsp()
|
boolean |
getWrapAttVals()
|
boolean |
getWrapJste()
|
int |
getWraplen()
|
boolean |
getWrapPhp()
|
boolean |
getWrapScriptlets()
|
boolean |
getWrapSection()
|
boolean |
getWriteback()
|
boolean |
getXHTML()
|
boolean |
getXmlOut()
|
boolean |
getXmlPi()
|
boolean |
getXmlPIs()
|
boolean |
getXmlSpace()
|
boolean |
getXmlTags()
|
private void |
init()
first time initialization which should precede reading the command line |
static void |
main(java.lang.String[] argv)
Command line interface to parser and pretty printer. |
Node |
parse(java.io.InputStream in,
java.io.OutputStream out)
Parses InputStream in and returns the root Node. |
private Node |
parse(java.io.InputStream in,
java.lang.String file,
java.io.OutputStream out)
Internal routine that actually does the parsing. |
org.w3c.dom.Document |
parseDOM(java.io.InputStream in,
java.io.OutputStream out)
Parses InputStream in and returns a DOM Document node. |
void |
pprint(org.w3c.dom.Document doc,
java.io.OutputStream out)
Pretty-prints a DOM Document. |
void |
setAltText(java.lang.String altText)
AltText - default text for alt attribute |
void |
setBreakBeforeBR(boolean BreakBeforeBR)
BreakBeforeBR - o/p newline before <br> or not? |
void |
setBurstSlides(boolean BurstSlides)
BurstSlides - create slides on each h2 element |
void |
setCharEncoding(int charencoding)
CharEncoding |
void |
setConfigurationFromFile(java.lang.String filename)
Sets the configuration from a configuration file. |
void |
setConfigurationFromProps(java.util.Properties props)
Sets the configuration from a properties object. |
void |
setDocType(java.lang.String doctype)
DocType - user specified doctype omit | auto | strict | loose | fpi where the fpi is a string similar to "-//ACME//DTD HTML 3.14159//EN" Note: for fpi include the double-quotes in the string. |
void |
setDropEmptyParas(boolean DropEmptyParas)
DropEmptyParas - discard empty p elements |
void |
setDropFontTags(boolean DropFontTags)
DropFontTags - discard presentation tags |
void |
setEmacs(boolean Emacs)
Emacs - if true format error output for GNU Emacs |
void |
setEncloseBlockText(boolean EncloseBlockText)
EncloseBlockText - if true text in blocks is wrapped in <p>'s |
void |
setEncloseText(boolean EncloseText)
EncloseText - if true text at body is wrapped in <p>'s |
void |
setErrfile(java.lang.String errfile)
Errfile - file name to write errors to |
void |
setErrout(java.io.PrintWriter errout)
|
void |
setFixBackslash(boolean FixBackslash)
FixBackslash - fix URLs by replacing \ with / |
void |
setFixComments(boolean FixComments)
FixComments - fix comments with adjacent hyphens |
void |
setHideEndTags(boolean HideEndTags)
HideEndTags - suppress optional end tags |
void |
setIndentAttributes(boolean IndentAttributes)
IndentAttributes - newline+indent before each attribute |
void |
setIndentContent(boolean IndentContent)
IndentContent - indent content of appropriate tags |
void |
setInputStreamName(java.lang.String name)
InputStreamName - the name of the input stream (printed in the header information). |
void |
setKeepFileTimes(boolean KeepFileTimes)
KeepFileTimes - if true last modified time is preserved this is NOT supported at this time. |
void |
setLiteralAttribs(boolean LiteralAttribs)
LiteralAttribs - if true attributes may use newlines |
void |
setLogicalEmphasis(boolean LogicalEmphasis)
LogicalEmphasis - replace i by em and b by strong |
void |
setMakeClean(boolean MakeClean)
MakeClean - remove presentational clutter |
void |
setNumEntities(boolean NumEntities)
NumEntities - use numeric entities |
void |
setOnlyErrors(boolean OnlyErrors)
OnlyErrors - if true normal output is suppressed |
void |
setQuiet(boolean Quiet)
Quiet - no 'Parsing X', guessed DTD or summary |
void |
setQuoteAmpersand(boolean QuoteAmpersand)
QuoteAmpersand - output naked ampersand as & |
void |
setQuoteMarks(boolean QuoteMarks)
QuoteMarks - output " marks as " |
void |
setQuoteNbsp(boolean QuoteNbsp)
QuoteNbsp - output non-breaking space as entity |
void |
setRawOut(boolean RawOut)
RawOut - avoid mapping values > 127 to entities |
void |
setShowWarnings(boolean ShowWarnings)
ShowWarnings - however errors are always shown |
void |
setSlidestyle(java.lang.String slidestyle)
Slidestyle - style sheet for slides |
void |
setSmartIndent(boolean SmartIndent)
SmartIndent - does text/block level content effect indentation |
void |
setSpaces(int spaces)
Spaces - default indentation |
void |
setTabsize(int tabsize)
Tabsize |
void |
setTidyMark(boolean TidyMark)
TidyMark - add meta element indicating tidied doc |
void |
setUpperCaseAttrs(boolean UpperCaseAttrs)
UpperCaseAttrs - output attributes in upper not lower case |
void |
setUpperCaseTags(boolean UpperCaseTags)
UpperCaseTags - output tags in upper not lower case |
void |
setWord2000(boolean Word2000)
Word2000 - draconian cleaning for Word2000 |
void |
setWrapAsp(boolean WrapAsp)
WrapAsp - wrap within ASP pseudo elements |
void |
setWrapAttVals(boolean WrapAttVals)
WrapAttVals - wrap within attribute values |
void |
setWrapJste(boolean WrapJste)
WrapJste - wrap within JSTE pseudo elements |
void |
setWraplen(int wraplen)
Wraplen - default wrap margin |
void |
setWrapPhp(boolean WrapPhp)
WrapPhp - wrap within PHP pseudo elements |
void |
setWrapScriptlets(boolean WrapScriptlets)
WrapScriptlets - wrap within JavaScript string literals |
void |
setWrapSection(boolean WrapSection)
WrapSection - wrap within <![ ... |
void |
setWriteback(boolean writeback)
Writeback - if true then output tidied markup NOTE: this property is ignored when parsing from an InputStream. |
void |
setXHTML(boolean xHTML)
XHTML - output extensible HTML |
void |
setXmlOut(boolean XmlOut)
XmlOut - create output as XML |
void |
setXmlPi(boolean XmlPi)
XmlPi - add <?xml?> for XML docs |
void |
setXmlPIs(boolean XmlPIs)
XmlPIs - if set to true PIs must end with ?> |
void |
setXmlSpace(boolean XmlSpace)
XmlSpace - if set to yes adds xml:space attr as needed |
void |
setXmlTags(boolean XmlTags)
XmlTags - treat input as XML |
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
serialVersionUID
static final long serialVersionUID
- See Also:
- Constant Field Values
initialized
private boolean initialized
errout
private java.io.PrintWriter errout
stderr
private java.io.PrintWriter stderr
configuration
private Configuration configuration
inputStreamName
private java.lang.String inputStreamName
parseErrors
private int parseErrors
parseWarnings
private int parseWarnings
| Constructor Detail |
Tidy
public Tidy()
| Method Detail |
getConfiguration
public Configuration getConfiguration()
getStderr
public java.io.PrintWriter getStderr()
getParseErrors
public int getParseErrors()
- ParseErrors - the number of errors that occurred in the most
recent parse operation
getParseWarnings
public int getParseWarnings()
- ParseWarnings - the number of warnings that occurred in the most
recent parse operation
getErrout
public java.io.PrintWriter getErrout()
- Errout - the error output stream
setErrout
public void setErrout(java.io.PrintWriter errout)
setSpaces
public void setSpaces(int spaces)
- Spaces - default indentation
getSpaces
public int getSpaces()
setWraplen
public void setWraplen(int wraplen)
- Wraplen - default wrap margin
getWraplen
public int getWraplen()
setCharEncoding
public void setCharEncoding(int charencoding)
- CharEncoding
getCharEncoding
public int getCharEncoding()
setTabsize
public void setTabsize(int tabsize)
- Tabsize
getTabsize
public int getTabsize()
setErrfile
public void setErrfile(java.lang.String errfile)
- Errfile - file name to write errors to
getErrfile
public java.lang.String getErrfile()
setWriteback
public void setWriteback(boolean writeback)
- Writeback - if true then output tidied markup
NOTE: this property is ignored when parsing from an InputStream.
getWriteback
public boolean getWriteback()
setOnlyErrors
public void setOnlyErrors(boolean OnlyErrors)
- OnlyErrors - if true normal output is suppressed
getOnlyErrors
public boolean getOnlyErrors()
setShowWarnings
public void setShowWarnings(boolean ShowWarnings)
- ShowWarnings - however errors are always shown
getShowWarnings
public boolean getShowWarnings()
setQuiet
public void setQuiet(boolean Quiet)
- Quiet - no 'Parsing X', guessed DTD or summary
getQuiet
public boolean getQuiet()
setIndentContent
public void setIndentContent(boolean IndentContent)
- IndentContent - indent content of appropriate tags
getIndentContent
public boolean getIndentContent()
setSmartIndent
public void setSmartIndent(boolean SmartIndent)
- SmartIndent - does text/block level content effect indentation
getSmartIndent
public boolean getSmartIndent()
setHideEndTags
public void setHideEndTags(boolean HideEndTags)
- HideEndTags - suppress optional end tags
getHideEndTags
public boolean getHideEndTags()
setXmlTags
public void setXmlTags(boolean XmlTags)
- XmlTags - treat input as XML
getXmlTags
public boolean getXmlTags()
setXmlOut
public void setXmlOut(boolean XmlOut)
- XmlOut - create output as XML
getXmlOut
public boolean getXmlOut()
setXHTML
public void setXHTML(boolean xHTML)
- XHTML - output extensible HTML
getXHTML
public boolean getXHTML()
setRawOut
public void setRawOut(boolean RawOut)
- RawOut - avoid mapping values > 127 to entities
getRawOut
public boolean getRawOut()
setUpperCaseTags
public void setUpperCaseTags(boolean UpperCaseTags)
- UpperCaseTags - output tags in upper not lower case
getUpperCaseTags
public boolean getUpperCaseTags()
setUpperCaseAttrs
public void setUpperCaseAttrs(boolean UpperCaseAttrs)
- UpperCaseAttrs - output attributes in upper not lower case
getUpperCaseAttrs
public boolean getUpperCaseAttrs()
setMakeClean
public void setMakeClean(boolean MakeClean)
- MakeClean - remove presentational clutter
getMakeClean
public boolean getMakeClean()
setBreakBeforeBR
public void setBreakBeforeBR(boolean BreakBeforeBR)
- BreakBeforeBR - o/p newline before <br> or not?
getBreakBeforeBR
public boolean getBreakBeforeBR()
setBurstSlides
public void setBurstSlides(boolean BurstSlides)
- BurstSlides - create slides on each h2 element
getBurstSlides
public boolean getBurstSlides()
setNumEntities
public void setNumEntities(boolean NumEntities)
- NumEntities - use numeric entities
getNumEntities
public boolean getNumEntities()
setQuoteMarks
public void setQuoteMarks(boolean QuoteMarks)
- QuoteMarks - output " marks as "
getQuoteMarks
public boolean getQuoteMarks()
setQuoteNbsp
public void setQuoteNbsp(boolean QuoteNbsp)
- QuoteNbsp - output non-breaking space as entity
getQuoteNbsp
public boolean getQuoteNbsp()
setQuoteAmpersand
public void setQuoteAmpersand(boolean QuoteAmpersand)
- QuoteAmpersand - output naked ampersand as &
getQuoteAmpersand
public boolean getQuoteAmpersand()
setWrapAttVals
public void setWrapAttVals(boolean WrapAttVals)
- WrapAttVals - wrap within attribute values
getWrapAttVals
public boolean getWrapAttVals()
setWrapScriptlets
public void setWrapScriptlets(boolean WrapScriptlets)
- WrapScriptlets - wrap within JavaScript string literals
getWrapScriptlets
public boolean getWrapScriptlets()
setWrapSection
public void setWrapSection(boolean WrapSection)
- WrapSection - wrap within <![ ... ]> section tags
getWrapSection
public boolean getWrapSection()
setAltText
public void setAltText(java.lang.String altText)
- AltText - default text for alt attribute
getAltText
public java.lang.String getAltText()
setSlidestyle
public void setSlidestyle(java.lang.String slidestyle)
- Slidestyle - style sheet for slides
getSlidestyle
public java.lang.String getSlidestyle()
setXmlPi
public void setXmlPi(boolean XmlPi)
- XmlPi - add <?xml?> for XML docs
getXmlPi
public boolean getXmlPi()
setDropFontTags
public void setDropFontTags(boolean DropFontTags)
- DropFontTags - discard presentation tags
getDropFontTags
public boolean getDropFontTags()
setDropEmptyParas
public void setDropEmptyParas(boolean DropEmptyParas)
- DropEmptyParas - discard empty p elements
getDropEmptyParas
public boolean getDropEmptyParas()
setFixComments
public void setFixComments(boolean FixComments)
- FixComments - fix comments with adjacent hyphens
getFixComments
public boolean getFixComments()
setWrapAsp
public void setWrapAsp(boolean WrapAsp)
- WrapAsp - wrap within ASP pseudo elements
getWrapAsp
public boolean getWrapAsp()
setWrapJste
public void setWrapJste(boolean WrapJste)
- WrapJste - wrap within JSTE pseudo elements
getWrapJste
public boolean getWrapJste()
setWrapPhp
public void setWrapPhp(boolean WrapPhp)
- WrapPhp - wrap within PHP pseudo elements
getWrapPhp
public boolean getWrapPhp()
setFixBackslash
public void setFixBackslash(boolean FixBackslash)
- FixBackslash - fix URLs by replacing \ with /
getFixBackslash
public boolean getFixBackslash()
setIndentAttributes
public void setIndentAttributes(boolean IndentAttributes)
- IndentAttributes - newline+indent before each attribute
getIndentAttributes
public boolean getIndentAttributes()
setDocType
public void setDocType(java.lang.String doctype)
- DocType - user specified doctype
omit | auto | strict | loose | fpi
where the fpi is a string similar to
"-//ACME//DTD HTML 3.14159//EN"
Note: for fpi include the double-quotes in the string.
getDocType
public java.lang.String getDocType()
setLogicalEmphasis
public void setLogicalEmphasis(boolean LogicalEmphasis)
- LogicalEmphasis - replace i by em and b by strong
getLogicalEmphasis
public boolean getLogicalEmphasis()
setXmlPIs
public void setXmlPIs(boolean XmlPIs)
- XmlPIs - if set to true PIs must end with ?>
getXmlPIs
public boolean getXmlPIs()
setEncloseText
public void setEncloseText(boolean EncloseText)
- EncloseText - if true text at body is wrapped in <p>'s
getEncloseText
public boolean getEncloseText()
setEncloseBlockText
public void setEncloseBlockText(boolean EncloseBlockText)
- EncloseBlockText - if true text in blocks is wrapped in <p>'s
getEncloseBlockText
public boolean getEncloseBlockText()
setKeepFileTimes
public void setKeepFileTimes(boolean KeepFileTimes)
- KeepFileTimes - if true last modified time is preserved
this is NOT supported at this time.
getKeepFileTimes
public boolean getKeepFileTimes()
setWord2000
public void setWord2000(boolean Word2000)
- Word2000 - draconian cleaning for Word2000
getWord2000
public boolean getWord2000()
setTidyMark
public void setTidyMark(boolean TidyMark)
- TidyMark - add meta element indicating tidied doc
getTidyMark
public boolean getTidyMark()
setXmlSpace
public void setXmlSpace(boolean XmlSpace)
- XmlSpace - if set to yes adds xml:space attr as needed
getXmlSpace
public boolean getXmlSpace()
setEmacs
public void setEmacs(boolean Emacs)
- Emacs - if true format error output for GNU Emacs
getEmacs
public boolean getEmacs()
setLiteralAttribs
public void setLiteralAttribs(boolean LiteralAttribs)
- LiteralAttribs - if true attributes may use newlines
getLiteralAttribs
public boolean getLiteralAttribs()
setInputStreamName
public void setInputStreamName(java.lang.String name)
- InputStreamName - the name of the input stream (printed in the
header information).
getInputStreamName
public java.lang.String getInputStreamName()
setConfigurationFromFile
public void setConfigurationFromFile(java.lang.String filename)
- Sets the configuration from a configuration file.
setConfigurationFromProps
public void setConfigurationFromProps(java.util.Properties props)
- Sets the configuration from a properties object.
init
private void init()
- first time initialization which should
precede reading the command line
parse
public Node parse(java.io.InputStream in, java.io.OutputStream out)
- Parses InputStream in and returns the root Node.
If out is non-null, pretty prints to OutputStream out.
parse
private Node parse(java.io.InputStream in, java.lang.String file, java.io.OutputStream out) throws java.io.FileNotFoundException, java.io.IOException
- Internal routine that actually does the parsing. The caller
can pass either an InputStream or file name. If both are passed,
the file name is preferred.
parseDOM
public org.w3c.dom.Document parseDOM(java.io.InputStream in, java.io.OutputStream out)
- Parses InputStream in and returns a DOM Document node.
If out is non-null, pretty prints to OutputStream out.
createEmptyDocument
public static org.w3c.dom.Document createEmptyDocument()
- Creates an empty DOM Document.
pprint
public void pprint(org.w3c.dom.Document doc, java.io.OutputStream out)
- Pretty-prints a DOM Document.
main
public static void main(java.lang.String[] argv)
- Command line interface to parser and pretty printer.
|
|||||||||
| Home >> All >> org >> w3c >> [ tidy overview ] | PREV CLASS NEXT CLASS | ||||||||
SUMMARY: JAVADOC | SOURCE | DOWNLOAD | NESTED | FIELD | CONSTR | METHOD |
DETAIL: FIELD | CONSTR | METHOD | ||||||||
JAVADOC
org.w3c.tidy.Tidy