Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

com.port80.html.tidy
Class Clean  view Clean download Clean.java

java.lang.Object
  extended bycom.port80.html.tidy.Clean

public class Clean
extends java.lang.Object

Filters from other formats such as Microsoft Word often make excessive use of presentation markup such as font tags, B, I, and the align attribute. By applying a set of production rules, it is straight forward to transform this to use CSS. Some rules replace some of the children of an element by style properties on the element, e.g.

...

->

...

Such rules are applied to the element's content and then to the element itself until none of the rules more apply. Having applied all the rules to an element, it will have a style attribute with one or more properties. Other rules strip the element they apply to, replacing it by style properties on the contents, e.g.
  • ...

  • ->

    ... These rules are applied to an element before processing its content and replace the current element by the first element in the exposed content. After applying both sets of rules, you can replace the style attribute by a class value and style rule in the document head. To support this, an association of styles and class names is built. A naive approach is to rely on string matching to test when two property lists are the same. A better approach would be to first sort the properties before matching.


    Field Summary
    private  int classNum
               
    private static boolean FIXME
               
    private static java.lang.String NAME
               
    private  TagTable tt
               
     
    Constructor Summary
    Clean(TagTable tt)
               
     
    Method Summary
    private  void addAlign(Node node, java.lang.String align)
               
    private  void addColorRule(Lexer lexer, java.lang.String selector, java.lang.String color)
               
    private  void addFontColor(Node node, java.lang.String color)
               
    private  void addFontFace(Node node, java.lang.String face)
               
    private  void addFontSize(Node node, java.lang.String size)
               
    private  void addFontStyles(Node node, AttVal av)
               
    private  java.lang.String addProperty(java.lang.String style, java.lang.String property)
               
    private  void addStyleProperty(Node node, java.lang.String property)
               
    private  boolean blockStyle(Lexer lexer, Node node, MutableObject pnode)
               
     void bQ2Div(Node node)
               
    private  boolean center2Div(Lexer lexer, Node node, MutableObject pnode)
               
    private  void cleanBodyAttrs(Lexer lexer, Node body)
               
    private  Node cleanNode(Lexer lexer, Node node)
               
     void cleanTree(Lexer lexer, Node doc)
               
     void cleanWord2000(Lexer lexer, Node node)
               
    private  StyleProp createProps(StyleProp prop, java.lang.String style)
               
    private  java.lang.String createPropString(StyleProp props)
               
    private  void createStyleElement(Lexer lexer, Node doc)
               
    private  Node createStyleProperties(Lexer lexer, Node node)
               
    private  void defineStyleRules(Lexer lexer, Node node)
               
    private  boolean dir2Div(Lexer lexer, Node node, MutableObject pnode)
               
    private  Node discardContainer(Node element)
              Discard the container 'element'.
     void dropSections(Lexer lexer, Node node)
               
     void emFromI(Node node)
               
    private  java.lang.String findStyle(Lexer lexer, java.lang.String tag, java.lang.String properties)
               
    private  void fixNodeLinks(Node node)
               
    private  boolean font2Span(Lexer lexer, Node node, MutableObject pnode)
               
    private  java.lang.String fontSize2Name(java.lang.String size)
               
    private  java.lang.String gensymClass(java.lang.String tag)
               
    private  boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode)
               
    private  StyleProp insertProperty(StyleProp props, java.lang.String name, java.lang.String value)
               
     boolean isWord2000(Node root, TagTable tt)
               
     void list2BQ(Node node)
               
    private  Node mergeContainers(Node element, Node next)
              Merge two consecutive containers, eg.
    private  boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode)
               
    private  java.lang.String mergeProperties(java.lang.String s1, java.lang.String s2)
               
    private  void mergeStyles(Node node, Node child)
               
     void nestedEmphasis(Node node)
              Simplifies ...
    private  boolean nestedList(Lexer lexer, Node node, MutableObject pnode)
               
    private  boolean niceBody(Lexer lexer, Node doc)
               
    private  void normalizeSpaces(Lexer lexer, Node node)
               
     Node pruneSection(Lexer lexer, Node node)
               
     void purgeAttributes(Node node)
               
    private  void stripOnlyChild(Node node)
               
     Node stripSpan(Lexer lexer, Node span)
               
    private  void style2Rule(Lexer lexer, Node node)
               
    private  void textAlign(Lexer lexer, Node node)
               
     
    Methods inherited from class java.lang.Object
    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
     

    Field Detail

    NAME

    private static final java.lang.String NAME
    See Also:
    Constant Field Values

    FIXME

    private static final boolean FIXME
    See Also:
    Constant Field Values

    classNum

    private int classNum

    tt

    private TagTable tt
    Constructor Detail

    Clean

    public Clean(TagTable tt)
    Method Detail

    insertProperty

    private StyleProp insertProperty(StyleProp props,
                                     java.lang.String name,
                                     java.lang.String value)

    createProps

    private StyleProp createProps(StyleProp prop,
                                  java.lang.String style)

    createPropString

    private java.lang.String createPropString(StyleProp props)

    addProperty

    private java.lang.String addProperty(java.lang.String style,
                                         java.lang.String property)

    gensymClass

    private java.lang.String gensymClass(java.lang.String tag)

    findStyle

    private java.lang.String findStyle(Lexer lexer,
                                       java.lang.String tag,
                                       java.lang.String properties)

    style2Rule

    private void style2Rule(Lexer lexer,
                            Node node)

    addColorRule

    private void addColorRule(Lexer lexer,
                              java.lang.String selector,
                              java.lang.String color)

    cleanBodyAttrs

    private void cleanBodyAttrs(Lexer lexer,
                                Node body)

    niceBody

    private boolean niceBody(Lexer lexer,
                             Node doc)

    createStyleElement

    private void createStyleElement(Lexer lexer,
                                    Node doc)

    fixNodeLinks

    private void fixNodeLinks(Node node)

    stripOnlyChild

    private void stripOnlyChild(Node node)

    discardContainer

    private Node discardContainer(Node element)
    Discard the container 'element'. Used to strip font start and end tags.


    mergeContainers

    private Node mergeContainers(Node element,
                                 Node next)
    Merge two consecutive containers, eg. <b>...</b><b>...</b>. Content of 'next' is appended to the content of 'element' and container 'next' is discarded.


    addStyleProperty

    private void addStyleProperty(Node node,
                                  java.lang.String property)

    mergeProperties

    private java.lang.String mergeProperties(java.lang.String s1,
                                             java.lang.String s2)

    mergeStyles

    private void mergeStyles(Node node,
                             Node child)

    fontSize2Name

    private java.lang.String fontSize2Name(java.lang.String size)

    addFontFace

    private void addFontFace(Node node,
                             java.lang.String face)

    addFontSize

    private void addFontSize(Node node,
                             java.lang.String size)

    addFontColor

    private void addFontColor(Node node,
                              java.lang.String color)

    addAlign

    private void addAlign(Node node,
                          java.lang.String align)

    addFontStyles

    private void addFontStyles(Node node,
                               AttVal av)

    textAlign

    private void textAlign(Lexer lexer,
                           Node node)

    dir2Div

    private boolean dir2Div(Lexer lexer,
                            Node node,
                            MutableObject pnode)

    center2Div

    private boolean center2Div(Lexer lexer,
                               Node node,
                               MutableObject pnode)

    mergeDivs

    private boolean mergeDivs(Lexer lexer,
                              Node node,
                              MutableObject pnode)

    nestedList

    private boolean nestedList(Lexer lexer,
                               Node node,
                               MutableObject pnode)

    blockStyle

    private boolean blockStyle(Lexer lexer,
                               Node node,
                               MutableObject pnode)

    inlineStyle

    private boolean inlineStyle(Lexer lexer,
                                Node node,
                                MutableObject pnode)

    font2Span

    private boolean font2Span(Lexer lexer,
                              Node node,
                              MutableObject pnode)

    cleanNode

    private Node cleanNode(Lexer lexer,
                           Node node)

    createStyleProperties

    private Node createStyleProperties(Lexer lexer,
                                       Node node)

    defineStyleRules

    private void defineStyleRules(Lexer lexer,
                                  Node node)

    cleanTree

    public void cleanTree(Lexer lexer,
                          Node doc)

    nestedEmphasis

    public void nestedEmphasis(Node node)
    Simplifies ... ... etc. Also remove adjacent blocks eg. ... ... etc.


    emFromI

    public void emFromI(Node node)

    list2BQ

    public void list2BQ(Node node)

    bQ2Div

    public void bQ2Div(Node node)

    pruneSection

    public Node pruneSection(Lexer lexer,
                             Node node)

    dropSections

    public void dropSections(Lexer lexer,
                             Node node)

    purgeAttributes

    public void purgeAttributes(Node node)

    stripSpan

    public Node stripSpan(Lexer lexer,
                          Node span)

    normalizeSpaces

    private void normalizeSpaces(Lexer lexer,
                                 Node node)

    cleanWord2000

    public void cleanWord2000(Lexer lexer,
                              Node node)

    isWord2000

    public boolean isWord2000(Node root,
                              TagTable tt)