java.lang.Object
com.port80.html.tidy.Clean
- public class Clean
- extends java.lang.Object
Filters from other formats such as Microsoft Word
often make excessive use of presentation markup such
as font tags, B, I, and the align attribute. By applying
a set of production rules, it is straight forward to
transform this to use CSS.
Some rules replace some of the children of an element by
style properties on the element, e.g.
...
->
...
Such rules are applied to the element's content and then
to the element itself until none of the rules more apply.
Having applied all the rules to an element, it will have
a style attribute with one or more properties.
Other rules strip the element they apply to, replacing
it by style properties on the contents, e.g.
...
->
...
These rules are applied to an element before processing
its content and replace the current element by the first
element in the exposed content.
After applying both sets of rules, you can replace the
style attribute by a class value and style rule in the
document head. To support this, an association of styles
and class names is built.
A naive approach is to rely on string matching to test
when two property lists are the same. A better approach
would be to first sort the properties before matching.
|
Method Summary |
private void |
addAlign(Node node,
java.lang.String align)
|
private void |
addColorRule(Lexer lexer,
java.lang.String selector,
java.lang.String color)
|
private void |
addFontColor(Node node,
java.lang.String color)
|
private void |
addFontFace(Node node,
java.lang.String face)
|
private void |
addFontSize(Node node,
java.lang.String size)
|
private void |
addFontStyles(Node node,
AttVal av)
|
private java.lang.String |
addProperty(java.lang.String style,
java.lang.String property)
|
private void |
addStyleProperty(Node node,
java.lang.String property)
|
private boolean |
blockStyle(Lexer lexer,
Node node,
MutableObject pnode)
|
void |
bQ2Div(Node node)
|
private boolean |
center2Div(Lexer lexer,
Node node,
MutableObject pnode)
|
private void |
cleanBodyAttrs(Lexer lexer,
Node body)
|
private Node |
cleanNode(Lexer lexer,
Node node)
|
void |
cleanTree(Lexer lexer,
Node doc)
|
void |
cleanWord2000(Lexer lexer,
Node node)
|
private StyleProp |
createProps(StyleProp prop,
java.lang.String style)
|
private java.lang.String |
createPropString(StyleProp props)
|
private void |
createStyleElement(Lexer lexer,
Node doc)
|
private Node |
createStyleProperties(Lexer lexer,
Node node)
|
private void |
defineStyleRules(Lexer lexer,
Node node)
|
private boolean |
dir2Div(Lexer lexer,
Node node,
MutableObject pnode)
|
private Node |
discardContainer(Node element)
Discard the container 'element'. |
void |
dropSections(Lexer lexer,
Node node)
|
void |
emFromI(Node node)
|
private java.lang.String |
findStyle(Lexer lexer,
java.lang.String tag,
java.lang.String properties)
|
private void |
fixNodeLinks(Node node)
|
private boolean |
font2Span(Lexer lexer,
Node node,
MutableObject pnode)
|
private java.lang.String |
fontSize2Name(java.lang.String size)
|
private java.lang.String |
gensymClass(java.lang.String tag)
|
private boolean |
inlineStyle(Lexer lexer,
Node node,
MutableObject pnode)
|
private StyleProp |
insertProperty(StyleProp props,
java.lang.String name,
java.lang.String value)
|
boolean |
isWord2000(Node root,
TagTable tt)
|
void |
list2BQ(Node node)
|
private Node |
mergeContainers(Node element,
Node next)
Merge two consecutive containers, eg. |
private boolean |
mergeDivs(Lexer lexer,
Node node,
MutableObject pnode)
|
private java.lang.String |
mergeProperties(java.lang.String s1,
java.lang.String s2)
|
private void |
mergeStyles(Node node,
Node child)
|
void |
nestedEmphasis(Node node)
Simplifies ... |
private boolean |
nestedList(Lexer lexer,
Node node,
MutableObject pnode)
|
private boolean |
niceBody(Lexer lexer,
Node doc)
|
private void |
normalizeSpaces(Lexer lexer,
Node node)
|
Node |
pruneSection(Lexer lexer,
Node node)
|
void |
purgeAttributes(Node node)
|
private void |
stripOnlyChild(Node node)
|
Node |
stripSpan(Lexer lexer,
Node span)
|
private void |
style2Rule(Lexer lexer,
Node node)
|
private void |
textAlign(Lexer lexer,
Node node)
|
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
NAME
private static final java.lang.String NAME
- See Also:
- Constant Field Values
FIXME
private static final boolean FIXME
- See Also:
- Constant Field Values
classNum
private int classNum
tt
private TagTable tt
Clean
public Clean(TagTable tt)
insertProperty
private StyleProp insertProperty(StyleProp props,
java.lang.String name,
java.lang.String value)
createProps
private StyleProp createProps(StyleProp prop,
java.lang.String style)
createPropString
private java.lang.String createPropString(StyleProp props)
addProperty
private java.lang.String addProperty(java.lang.String style,
java.lang.String property)
gensymClass
private java.lang.String gensymClass(java.lang.String tag)
findStyle
private java.lang.String findStyle(Lexer lexer,
java.lang.String tag,
java.lang.String properties)
style2Rule
private void style2Rule(Lexer lexer,
Node node)
addColorRule
private void addColorRule(Lexer lexer,
java.lang.String selector,
java.lang.String color)
cleanBodyAttrs
private void cleanBodyAttrs(Lexer lexer,
Node body)
niceBody
private boolean niceBody(Lexer lexer,
Node doc)
createStyleElement
private void createStyleElement(Lexer lexer,
Node doc)
fixNodeLinks
private void fixNodeLinks(Node node)
stripOnlyChild
private void stripOnlyChild(Node node)
discardContainer
private Node discardContainer(Node element)
- Discard the container 'element'.
Used to strip font start and end tags.
mergeContainers
private Node mergeContainers(Node element,
Node next)
- Merge two consecutive containers, eg. <b>...</b><b>...</b>.
Content of 'next' is appended to the content of 'element' and container 'next' is discarded.
addStyleProperty
private void addStyleProperty(Node node,
java.lang.String property)
mergeProperties
private java.lang.String mergeProperties(java.lang.String s1,
java.lang.String s2)
mergeStyles
private void mergeStyles(Node node,
Node child)
fontSize2Name
private java.lang.String fontSize2Name(java.lang.String size)
addFontFace
private void addFontFace(Node node,
java.lang.String face)
addFontSize
private void addFontSize(Node node,
java.lang.String size)
addFontColor
private void addFontColor(Node node,
java.lang.String color)
addAlign
private void addAlign(Node node,
java.lang.String align)
addFontStyles
private void addFontStyles(Node node,
AttVal av)
textAlign
private void textAlign(Lexer lexer,
Node node)
dir2Div
private boolean dir2Div(Lexer lexer,
Node node,
MutableObject pnode)
center2Div
private boolean center2Div(Lexer lexer,
Node node,
MutableObject pnode)
mergeDivs
private boolean mergeDivs(Lexer lexer,
Node node,
MutableObject pnode)
nestedList
private boolean nestedList(Lexer lexer,
Node node,
MutableObject pnode)
blockStyle
private boolean blockStyle(Lexer lexer,
Node node,
MutableObject pnode)
inlineStyle
private boolean inlineStyle(Lexer lexer,
Node node,
MutableObject pnode)
font2Span
private boolean font2Span(Lexer lexer,
Node node,
MutableObject pnode)
cleanNode
private Node cleanNode(Lexer lexer,
Node node)
createStyleProperties
private Node createStyleProperties(Lexer lexer,
Node node)
defineStyleRules
private void defineStyleRules(Lexer lexer,
Node node)
cleanTree
public void cleanTree(Lexer lexer,
Node doc)
nestedEmphasis
public void nestedEmphasis(Node node)
- Simplifies ... ... etc.
Also remove adjacent blocks eg. ... ... etc.
emFromI
public void emFromI(Node node)
list2BQ
public void list2BQ(Node node)
bQ2Div
public void bQ2Div(Node node)
pruneSection
public Node pruneSection(Lexer lexer,
Node node)
dropSections
public void dropSections(Lexer lexer,
Node node)
purgeAttributes
public void purgeAttributes(Node node)
stripSpan
public Node stripSpan(Lexer lexer,
Node span)
normalizeSpaces
private void normalizeSpaces(Lexer lexer,
Node node)
cleanWord2000
public void cleanWord2000(Lexer lexer,
Node node)
isWord2000
public boolean isWord2000(Node root,
TagTable tt)