Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

com.port80.html.tidy
Class Lexer  view Lexer download Lexer.java

java.lang.Object
  extended bycom.port80.html.tidy.Lexer

public class Lexer
extends java.lang.Object


Nested Class Summary
private static class Lexer.LexerASPState
           
private static class Lexer.LexerCDataState
           
private static class Lexer.LexerCommentState
           
private static class Lexer.LexerContentState
           
private static class Lexer.LexerDocTypeState
           
private static class Lexer.LexerEndTagState
           
private static class Lexer.LexerJSTEState
           
private static class Lexer.LexerPHPState
           
private static class Lexer.LexerProcInstState
           
private static class Lexer.LexerScriptState
           
private static class Lexer.LexerSectionState
           
private static class Lexer.LexerStartTagState
           
private static class Lexer.LexerState
           
private static class Lexer.LexerTagState
           
private static class Lexer.W3CVersionInfo
           
 
Field Summary
 short badAccess
           
 short badChars
           
 boolean badDoctype
           
 short badForm
           
 short badLayout
           
private static boolean CHECK
           
private  int column
           
 TidyConfiguration configuration
           
private static short DIGIT
           
 int doctype
           
 short errors
           
 boolean excludeBlocks
           
 boolean exiled
           
private  java.lang.String fInputName
           
private  char fInsertSpace
           
 Node fSavedTextNode
           
private  Lexer.LexerState fState
           
private  TagTable fTagTable
           
(package private)  boolean fWasWhite
           
private static short HEXDIGIT
           
 int insert
           
(package private)  boolean isEndWithLineBreak
          Text content is ended with a line separator.
(package private)  boolean isEndWithMultiLineBreak
           
(package private)  boolean isStartWithLineBreak
           
(package private)  boolean isStartWithMultiLineBreak
           
 java.util.Stack istack
           
 int istackbase
           
 IHTMLReader iStream
           
 boolean isvoyager
           
private static short LETTER
           
private  CharBuffer lexbuf
           
private static short[] lexmap
           
private  int lineno
           
private static short LOWERCASE
           
static short MODE_ATTR_VALUE
           
static short MODE_IGNORE_MARKUP
           
static short MODE_IGNORE_WHITESPACE
           
static short MODE_JAVASCRIPT
           
static short MODE_MIXED_CONTENT
           
static short MODE_PREFORMATTED
           
static short MODE_SCRIPT
           
private static java.lang.String NAME
           
private static short NAMECHAR
           
private static short NEWLINE
           
private  boolean pushed
           
protected  boolean seenBodyEndTag
           
 Style styles
           
private  Node token
           
private static short UPPERCASE
           
private static boolean VERBOSE
           
 short versions
           
private static java.lang.String voyager_frameset
           
private static java.lang.String voyager_loose
           
private static java.lang.String voyager_strict
           
private static Lexer.W3CVersionInfo[] W3CVersion
           
 short warnings
           
private static short WHITE
           
private static java.lang.String XHTML_NAMESPACE
           
 
Constructor Summary
Lexer(IHTMLReader in, java.lang.String inputname, TidyConfiguration configuration)
           
 
Method Summary
 boolean addGenerator(Node root)
           
private  short apparentVersion()
           
 void append(char c)
           
 void append(java.lang.String str)
           
 boolean canPrune(Node element)
           
 void changeChar(char c)
           
 boolean checkDocTypeKeyWords(Node doctype)
           
 AttVal cloneAttributes(AttVal attrs)
           
 Node cloneNode(Node node)
           
(package private)  void decLength(int n)
           
 void deferDup()
          Defer duplicates when entering a table or other element where the inlines shouldn't be duplicated
static boolean expectsContent(Node node)
           
private static boolean findBadSubString(java.lang.String s, java.lang.String p, int len)
           
(package private)  short findGivenVersion(Node doctype)
           
 boolean fixDocType(Node root)
           
private  void fixHTMLNameSpace(Node root, java.lang.String profile)
           
 void fixId(Node node)
           
 boolean fixXMLPI(Node root)
           
(package private)  CharBuffer getBuffer()
           
 char getChar(int i)
           
 int getColumn()
           
 java.lang.String getInputName()
           
 int getLineNumber()
           
 int getPosition()
           
 Node getScript(Node container)
           
 Lexer.LexerState getState()
           
static java.lang.String getString(CharBuffer buf, int offset, int len)
           
(package private)  TagTable getTagTable()
           
 Node getToken(int mode)
           
 short HTMLVersion()
           
 java.lang.String HTMLVersionName()
           
 Node inferredTag(java.lang.String name)
           
private  Lexer.LexerState initState()
           
 int inlineDup(Node node)
           
private  Node insertedToken()
           
private static boolean isJavaScript(Node node)
           
 boolean isPushed(Node node)
           
static boolean isValidAttrName(java.lang.String attr)
           
 int length()
           
static void main(java.lang.String[] args)
           
private static short MAP(char c)
           
private static void mapStr(java.lang.String str, short code)
           
 void markPosition()
           
 Node newLineNode()
           
 Node newNode(int type)
           
 Node newNode(int type, int start, int end, int srcstart)
           
 Node newNode(int type, int start, int end, int srcstart, java.lang.String element)
           
 Node newNode(int type, int start, int end, int srcstart, java.lang.String element, AttVal attributes)
           
 Node newTextNode(int start, int end, int srcstart, int srcend)
           
private  Node parseAttrAsp()
           
private  java.lang.String parseAttribute(MutableBoolean isempty, MutableObject asp, MutableObject php)
           
private  Node parseAttrPhp()
          PHP is like ASP but is based upon XML processing instructions, e.g.
(package private)  AttVal parseAttrs(MutableBoolean isempty)
           
(package private)  void parseEntity(int mode)
          No longer attempts to insert missing ';' for unknown enitities unless one was present already, since this gives unexpected results.
private  char parseServerInstruction()
           
(package private)  char parseTagName(char c)
          Get tag name.
private  java.lang.String parseValue(java.lang.String name, boolean foldCase, MutableBoolean isempty, MutableChar pdelim)
           
 void popInline(Node node)
          Pop inline tag stack.
 void pushInline(Node node)
           
 void setInsertSpace(char c)
           
(package private)  void setPosition(int line, int column)
           
 Lexer.LexerState setState(Lexer.LexerState state)
           
 void setWasWhite(boolean b)
           
 boolean setXHTMLDocType(Node root)
           
 void ungetToken()
           
static int wstrcasecmp(java.lang.String s1, java.lang.String s2)
           
static int wstrcaselexcmp(java.lang.String s1, java.lang.String s2)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

NAME

private static final java.lang.String NAME
See Also:
Constant Field Values

CHECK

private static final boolean CHECK
See Also:
Constant Field Values

VERBOSE

private static boolean VERBOSE

voyager_loose

private static final java.lang.String voyager_loose
See Also:
Constant Field Values

voyager_strict

private static final java.lang.String voyager_strict
See Also:
Constant Field Values

voyager_frameset

private static final java.lang.String voyager_frameset
See Also:
Constant Field Values

XHTML_NAMESPACE

private static final java.lang.String XHTML_NAMESPACE
See Also:
Constant Field Values

W3CVersion

private static Lexer.W3CVersionInfo[] W3CVersion

DIGIT

private static final short DIGIT
See Also:
Constant Field Values

LETTER

private static final short LETTER
See Also:
Constant Field Values

NAMECHAR

private static final short NAMECHAR
See Also:
Constant Field Values

WHITE

private static final short WHITE
See Also:
Constant Field Values

NEWLINE

private static final short NEWLINE
See Also:
Constant Field Values

LOWERCASE

private static final short LOWERCASE
See Also:
Constant Field Values

UPPERCASE

private static final short UPPERCASE
See Also:
Constant Field Values

HEXDIGIT

private static final short HEXDIGIT
See Also:
Constant Field Values

MODE_IGNORE_WHITESPACE

public static final short MODE_IGNORE_WHITESPACE
See Also:
Constant Field Values

MODE_MIXED_CONTENT

public static final short MODE_MIXED_CONTENT
See Also:
Constant Field Values

MODE_PREFORMATTED

public static final short MODE_PREFORMATTED
See Also:
Constant Field Values

MODE_IGNORE_MARKUP

public static final short MODE_IGNORE_MARKUP
See Also:
Constant Field Values

MODE_JAVASCRIPT

public static final short MODE_JAVASCRIPT
See Also:
Constant Field Values

MODE_SCRIPT

public static final short MODE_SCRIPT
See Also:
Constant Field Values

MODE_ATTR_VALUE

public static final short MODE_ATTR_VALUE
See Also:
Constant Field Values

lexmap

private static short[] lexmap

iStream

public IHTMLReader iStream

badAccess

public short badAccess

badLayout

public short badLayout

badChars

public short badChars

badForm

public short badForm

warnings

public short warnings

errors

public short errors

excludeBlocks

public boolean excludeBlocks

exiled

public boolean exiled

isvoyager

public boolean isvoyager

versions

public short versions

doctype

public int doctype

badDoctype

public boolean badDoctype

fInputName

private java.lang.String fInputName

lineno

private int lineno

column

private int column

token

private Node token

pushed

private boolean pushed

fState

private Lexer.LexerState fState

lexbuf

private CharBuffer lexbuf

istack

public java.util.Stack istack

istackbase

public int istackbase

fSavedTextNode

public Node fSavedTextNode

insert

public int insert

configuration

public TidyConfiguration configuration

styles

public Style styles

fTagTable

private TagTable fTagTable

seenBodyEndTag

protected boolean seenBodyEndTag

fInsertSpace

private char fInsertSpace

fWasWhite

boolean fWasWhite

isEndWithLineBreak

boolean isEndWithLineBreak
Text content is ended with a line separator.


isEndWithMultiLineBreak

boolean isEndWithMultiLineBreak

isStartWithLineBreak

boolean isStartWithLineBreak

isStartWithMultiLineBreak

boolean isStartWithMultiLineBreak
Constructor Detail

Lexer

public Lexer(IHTMLReader in,
             java.lang.String inputname,
             TidyConfiguration configuration)
Method Detail

mapStr

private static void mapStr(java.lang.String str,
                           short code)

initState

private Lexer.LexerState initState()

isValidAttrName

public static boolean isValidAttrName(java.lang.String attr)

getString

public static java.lang.String getString(CharBuffer buf,
                                         int offset,
                                         int len)

expectsContent

public static boolean expectsContent(Node node)

wstrcasecmp

public static int wstrcasecmp(java.lang.String s1,
                              java.lang.String s2)

wstrcaselexcmp

public static int wstrcaselexcmp(java.lang.String s1,
                                 java.lang.String s2)

newNode

public Node newNode(int type)

newNode

public Node newNode(int type,
                    int start,
                    int end,
                    int srcstart)

newNode

public Node newNode(int type,
                    int start,
                    int end,
                    int srcstart,
                    java.lang.String element)

newNode

public Node newNode(int type,
                    int start,
                    int end,
                    int srcstart,
                    java.lang.String element,
                    AttVal attributes)

newTextNode

public Node newTextNode(int start,
                        int end,
                        int srcstart,
                        int srcend)

newLineNode

public Node newLineNode()

cloneNode

public Node cloneNode(Node node)

cloneAttributes

public AttVal cloneAttributes(AttVal attrs)

changeChar

public void changeChar(char c)

append

public final void append(char c)

append

public final void append(java.lang.String str)

decLength

final void decLength(int n)

getBuffer

final CharBuffer getBuffer()

getTagTable

final TagTable getTagTable()

setPosition

final void setPosition(int line,
                       int column)

getInputName

public java.lang.String getInputName()

setState

public Lexer.LexerState setState(Lexer.LexerState state)

getState

public Lexer.LexerState getState()

ungetToken

public void ungetToken()

getToken

public Node getToken(int mode)

getScript

public Node getScript(Node container)

pushInline

public void pushInline(Node node)

popInline

public void popInline(Node node)
Pop inline tag stack.


isPushed

public boolean isPushed(Node node)

inlineDup

public int inlineDup(Node node)

HTMLVersion

public short HTMLVersion()

HTMLVersionName

public java.lang.String HTMLVersionName()

addGenerator

public boolean addGenerator(Node root)

checkDocTypeKeyWords

public boolean checkDocTypeKeyWords(Node doctype)

setXHTMLDocType

public boolean setXHTMLDocType(Node root)

fixDocType

public boolean fixDocType(Node root)

fixXMLPI

public boolean fixXMLPI(Node root)

inferredTag

public Node inferredTag(java.lang.String name)

fixId

public void fixId(Node node)

deferDup

public void deferDup()
Defer duplicates when entering a table or other element where the inlines shouldn't be duplicated


canPrune

public boolean canPrune(Node element)

length

public final int length()

getChar

public final char getChar(int i)

getLineNumber

public final int getLineNumber()

getColumn

public final int getColumn()

getPosition

public final int getPosition()

setInsertSpace

public final void setInsertSpace(char c)

setWasWhite

public final void setWasWhite(boolean b)

markPosition

public void markPosition()

findBadSubString

private static boolean findBadSubString(java.lang.String s,
                                        java.lang.String p,
                                        int len)

MAP

private static short MAP(char c)

isJavaScript

private static boolean isJavaScript(Node node)

findGivenVersion

short findGivenVersion(Node doctype)

fixHTMLNameSpace

private void fixHTMLNameSpace(Node root,
                              java.lang.String profile)

apparentVersion

private short apparentVersion()

parseTagName

char parseTagName(char c)
Get tag name.


parseEntity

void parseEntity(int mode)
No longer attempts to insert missing ';' for unknown enitities unless one was present already, since this gives unexpected results. For example: was tidied to: rather than: My thanks for Maurice Buxton for spotting this.


parseAttrAsp

private Node parseAttrAsp()

parseAttrPhp

private Node parseAttrPhp()
PHP is like ASP but is based upon XML processing instructions, e.g. <?php ... ?>


parseAttribute

private java.lang.String parseAttribute(MutableBoolean isempty,
                                        MutableObject asp,
                                        MutableObject php)

parseServerInstruction

private char parseServerInstruction()

parseValue

private java.lang.String parseValue(java.lang.String name,
                                    boolean foldCase,
                                    MutableBoolean isempty,
                                    MutableChar pdelim)

parseAttrs

AttVal parseAttrs(MutableBoolean isempty)

insertedToken

private Node insertedToken()

main

public static void main(java.lang.String[] args)