java.lang.Object
com.port80.html.tidy.Lexer
- public class Lexer
- extends java.lang.Object
|
Method Summary |
boolean |
addGenerator(Node root)
|
private short |
apparentVersion()
|
void |
append(char c)
|
void |
append(java.lang.String str)
|
boolean |
canPrune(Node element)
|
void |
changeChar(char c)
|
boolean |
checkDocTypeKeyWords(Node doctype)
|
AttVal |
cloneAttributes(AttVal attrs)
|
Node |
cloneNode(Node node)
|
(package private) void |
decLength(int n)
|
void |
deferDup()
Defer duplicates when entering a table or other
element where the inlines shouldn't be duplicated |
static boolean |
expectsContent(Node node)
|
private static boolean |
findBadSubString(java.lang.String s,
java.lang.String p,
int len)
|
(package private) short |
findGivenVersion(Node doctype)
|
boolean |
fixDocType(Node root)
|
private void |
fixHTMLNameSpace(Node root,
java.lang.String profile)
|
void |
fixId(Node node)
|
boolean |
fixXMLPI(Node root)
|
(package private) CharBuffer |
getBuffer()
|
char |
getChar(int i)
|
int |
getColumn()
|
java.lang.String |
getInputName()
|
int |
getLineNumber()
|
int |
getPosition()
|
Node |
getScript(Node container)
|
Lexer.LexerState |
getState()
|
static java.lang.String |
getString(CharBuffer buf,
int offset,
int len)
|
(package private) TagTable |
getTagTable()
|
Node |
getToken(int mode)
|
short |
HTMLVersion()
|
java.lang.String |
HTMLVersionName()
|
Node |
inferredTag(java.lang.String name)
|
private Lexer.LexerState |
initState()
|
int |
inlineDup(Node node)
|
private Node |
insertedToken()
|
private static boolean |
isJavaScript(Node node)
|
boolean |
isPushed(Node node)
|
static boolean |
isValidAttrName(java.lang.String attr)
|
int |
length()
|
static void |
main(java.lang.String[] args)
|
private static short |
MAP(char c)
|
private static void |
mapStr(java.lang.String str,
short code)
|
void |
markPosition()
|
Node |
newLineNode()
|
Node |
newNode(int type)
|
Node |
newNode(int type,
int start,
int end,
int srcstart)
|
Node |
newNode(int type,
int start,
int end,
int srcstart,
java.lang.String element)
|
Node |
newNode(int type,
int start,
int end,
int srcstart,
java.lang.String element,
AttVal attributes)
|
Node |
newTextNode(int start,
int end,
int srcstart,
int srcend)
|
private Node |
parseAttrAsp()
|
private java.lang.String |
parseAttribute(MutableBoolean isempty,
MutableObject asp,
MutableObject php)
|
private Node |
parseAttrPhp()
PHP is like ASP but is based upon XML
processing instructions, e.g. |
(package private) AttVal |
parseAttrs(MutableBoolean isempty)
|
(package private) void |
parseEntity(int mode)
No longer attempts to insert missing ';' for unknown
enitities unless one was present already, since this
gives unexpected results. |
private char |
parseServerInstruction()
|
(package private) char |
parseTagName(char c)
Get tag name. |
private java.lang.String |
parseValue(java.lang.String name,
boolean foldCase,
MutableBoolean isempty,
MutableChar pdelim)
|
void |
popInline(Node node)
Pop inline tag stack. |
void |
pushInline(Node node)
|
void |
setInsertSpace(char c)
|
(package private) void |
setPosition(int line,
int column)
|
Lexer.LexerState |
setState(Lexer.LexerState state)
|
void |
setWasWhite(boolean b)
|
boolean |
setXHTMLDocType(Node root)
|
void |
ungetToken()
|
static int |
wstrcasecmp(java.lang.String s1,
java.lang.String s2)
|
static int |
wstrcaselexcmp(java.lang.String s1,
java.lang.String s2)
|
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
NAME
private static final java.lang.String NAME
- See Also:
- Constant Field Values
CHECK
private static final boolean CHECK
- See Also:
- Constant Field Values
VERBOSE
private static boolean VERBOSE
voyager_loose
private static final java.lang.String voyager_loose
- See Also:
- Constant Field Values
voyager_strict
private static final java.lang.String voyager_strict
- See Also:
- Constant Field Values
voyager_frameset
private static final java.lang.String voyager_frameset
- See Also:
- Constant Field Values
XHTML_NAMESPACE
private static final java.lang.String XHTML_NAMESPACE
- See Also:
- Constant Field Values
W3CVersion
private static Lexer.W3CVersionInfo[] W3CVersion
DIGIT
private static final short DIGIT
- See Also:
- Constant Field Values
LETTER
private static final short LETTER
- See Also:
- Constant Field Values
NAMECHAR
private static final short NAMECHAR
- See Also:
- Constant Field Values
WHITE
private static final short WHITE
- See Also:
- Constant Field Values
NEWLINE
private static final short NEWLINE
- See Also:
- Constant Field Values
LOWERCASE
private static final short LOWERCASE
- See Also:
- Constant Field Values
UPPERCASE
private static final short UPPERCASE
- See Also:
- Constant Field Values
HEXDIGIT
private static final short HEXDIGIT
- See Also:
- Constant Field Values
MODE_IGNORE_WHITESPACE
public static final short MODE_IGNORE_WHITESPACE
- See Also:
- Constant Field Values
MODE_MIXED_CONTENT
public static final short MODE_MIXED_CONTENT
- See Also:
- Constant Field Values
MODE_PREFORMATTED
public static final short MODE_PREFORMATTED
- See Also:
- Constant Field Values
MODE_IGNORE_MARKUP
public static final short MODE_IGNORE_MARKUP
- See Also:
- Constant Field Values
MODE_JAVASCRIPT
public static final short MODE_JAVASCRIPT
- See Also:
- Constant Field Values
MODE_SCRIPT
public static final short MODE_SCRIPT
- See Also:
- Constant Field Values
MODE_ATTR_VALUE
public static final short MODE_ATTR_VALUE
- See Also:
- Constant Field Values
lexmap
private static short[] lexmap
iStream
public IHTMLReader iStream
badAccess
public short badAccess
badLayout
public short badLayout
badChars
public short badChars
badForm
public short badForm
warnings
public short warnings
errors
public short errors
excludeBlocks
public boolean excludeBlocks
exiled
public boolean exiled
isvoyager
public boolean isvoyager
versions
public short versions
doctype
public int doctype
badDoctype
public boolean badDoctype
fInputName
private java.lang.String fInputName
lineno
private int lineno
column
private int column
token
private Node token
pushed
private boolean pushed
fState
private Lexer.LexerState fState
lexbuf
private CharBuffer lexbuf
istack
public java.util.Stack istack
istackbase
public int istackbase
fSavedTextNode
public Node fSavedTextNode
insert
public int insert
configuration
public TidyConfiguration configuration
styles
public Style styles
fTagTable
private TagTable fTagTable
seenBodyEndTag
protected boolean seenBodyEndTag
fInsertSpace
private char fInsertSpace
fWasWhite
boolean fWasWhite
isEndWithLineBreak
boolean isEndWithLineBreak
- Text content is ended with a line separator.
isEndWithMultiLineBreak
boolean isEndWithMultiLineBreak
isStartWithLineBreak
boolean isStartWithLineBreak
isStartWithMultiLineBreak
boolean isStartWithMultiLineBreak
Lexer
public Lexer(IHTMLReader in,
java.lang.String inputname,
TidyConfiguration configuration)
mapStr
private static void mapStr(java.lang.String str,
short code)
initState
private Lexer.LexerState initState()
isValidAttrName
public static boolean isValidAttrName(java.lang.String attr)
getString
public static java.lang.String getString(CharBuffer buf,
int offset,
int len)
expectsContent
public static boolean expectsContent(Node node)
wstrcasecmp
public static int wstrcasecmp(java.lang.String s1,
java.lang.String s2)
wstrcaselexcmp
public static int wstrcaselexcmp(java.lang.String s1,
java.lang.String s2)
newNode
public Node newNode(int type)
newNode
public Node newNode(int type,
int start,
int end,
int srcstart)
newNode
public Node newNode(int type,
int start,
int end,
int srcstart,
java.lang.String element)
newNode
public Node newNode(int type,
int start,
int end,
int srcstart,
java.lang.String element,
AttVal attributes)
newTextNode
public Node newTextNode(int start,
int end,
int srcstart,
int srcend)
newLineNode
public Node newLineNode()
cloneNode
public Node cloneNode(Node node)
cloneAttributes
public AttVal cloneAttributes(AttVal attrs)
changeChar
public void changeChar(char c)
append
public final void append(char c)
append
public final void append(java.lang.String str)
decLength
final void decLength(int n)
getBuffer
final CharBuffer getBuffer()
getTagTable
final TagTable getTagTable()
setPosition
final void setPosition(int line,
int column)
getInputName
public java.lang.String getInputName()
setState
public Lexer.LexerState setState(Lexer.LexerState state)
getState
public Lexer.LexerState getState()
ungetToken
public void ungetToken()
getToken
public Node getToken(int mode)
getScript
public Node getScript(Node container)
pushInline
public void pushInline(Node node)
popInline
public void popInline(Node node)
- Pop inline tag stack.
isPushed
public boolean isPushed(Node node)
inlineDup
public int inlineDup(Node node)
HTMLVersion
public short HTMLVersion()
HTMLVersionName
public java.lang.String HTMLVersionName()
addGenerator
public boolean addGenerator(Node root)
checkDocTypeKeyWords
public boolean checkDocTypeKeyWords(Node doctype)
setXHTMLDocType
public boolean setXHTMLDocType(Node root)
fixDocType
public boolean fixDocType(Node root)
fixXMLPI
public boolean fixXMLPI(Node root)
inferredTag
public Node inferredTag(java.lang.String name)
fixId
public void fixId(Node node)
deferDup
public void deferDup()
- Defer duplicates when entering a table or other
element where the inlines shouldn't be duplicated
canPrune
public boolean canPrune(Node element)
length
public final int length()
getChar
public final char getChar(int i)
getLineNumber
public final int getLineNumber()
getColumn
public final int getColumn()
getPosition
public final int getPosition()
setInsertSpace
public final void setInsertSpace(char c)
setWasWhite
public final void setWasWhite(boolean b)
markPosition
public void markPosition()
findBadSubString
private static boolean findBadSubString(java.lang.String s,
java.lang.String p,
int len)
MAP
private static short MAP(char c)
isJavaScript
private static boolean isJavaScript(Node node)
findGivenVersion
short findGivenVersion(Node doctype)
fixHTMLNameSpace
private void fixHTMLNameSpace(Node root,
java.lang.String profile)
apparentVersion
private short apparentVersion()
parseTagName
char parseTagName(char c)
- Get tag name.
parseEntity
void parseEntity(int mode)
- No longer attempts to insert missing ';' for unknown
enitities unless one was present already, since this
gives unexpected results.
For example:
was tidied to:
rather than:
My thanks for Maurice Buxton for spotting this.
parseAttrAsp
private Node parseAttrAsp()
parseAttrPhp
private Node parseAttrPhp()
- PHP is like ASP but is based upon XML
processing instructions, e.g. <?php ... ?>
parseAttribute
private java.lang.String parseAttribute(MutableBoolean isempty,
MutableObject asp,
MutableObject php)
parseServerInstruction
private char parseServerInstruction()
parseValue
private java.lang.String parseValue(java.lang.String name,
boolean foldCase,
MutableBoolean isempty,
MutableChar pdelim)
parseAttrs
AttVal parseAttrs(MutableBoolean isempty)
insertedToken
private Node insertedToken()
main
public static void main(java.lang.String[] args)