Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

org.htmlparser.visitors
Class TextExtractingVisitor  view TextExtractingVisitor download TextExtractingVisitor.java

java.lang.Object
  extended byorg.htmlparser.visitors.NodeVisitor
      extended byorg.htmlparser.visitors.TextExtractingVisitor

public class TextExtractingVisitor
extends NodeVisitor

Extracts text from a web page. Usage: Parser parser = new Parser(...); TextExtractingVisitor visitor = new TextExtractingVisitor(); parser.visitAllNodesWith(visitor); String textInPage = visitor.getExtractedText();


Field Summary
private  boolean preTagBeingProcessed
           
private  java.lang.StringBuffer textAccumulator
           
 
Fields inherited from class org.htmlparser.visitors.NodeVisitor
 
Constructor Summary
TextExtractingVisitor()
           
 
Method Summary
 java.lang.String getExtractedText()
           
private  boolean isPreTag(org.htmlparser.tags.Tag tag)
           
private  java.lang.String replaceNonBreakingSpaceWithOrdinarySpace(java.lang.String text)
           
 void visitEndTag(org.htmlparser.tags.EndTag endTag)
           
 void visitStringNode(org.htmlparser.StringNode stringNode)
           
 void visitTag(org.htmlparser.tags.Tag tag)
           
 void visitTitleTag(org.htmlparser.tags.TitleTag titleTag)
           
 
Methods inherited from class org.htmlparser.visitors.NodeVisitor
finishedParsing, shouldRecurseChildren, shouldRecurseSelf, visitImageTag, visitLinkTag, visitRemarkNode
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

textAccumulator

private java.lang.StringBuffer textAccumulator

preTagBeingProcessed

private boolean preTagBeingProcessed
Constructor Detail

TextExtractingVisitor

public TextExtractingVisitor()
Method Detail

getExtractedText

public java.lang.String getExtractedText()

visitStringNode

public void visitStringNode(org.htmlparser.StringNode stringNode)
Overrides:
visitStringNode in class NodeVisitor

visitTitleTag

public void visitTitleTag(org.htmlparser.tags.TitleTag titleTag)
Overrides:
visitTitleTag in class NodeVisitor

replaceNonBreakingSpaceWithOrdinarySpace

private java.lang.String replaceNonBreakingSpaceWithOrdinarySpace(java.lang.String text)

visitEndTag

public void visitEndTag(org.htmlparser.tags.EndTag endTag)
Overrides:
visitEndTag in class NodeVisitor

visitTag

public void visitTag(org.htmlparser.tags.Tag tag)
Overrides:
visitTag in class NodeVisitor

isPreTag

private boolean isPreTag(org.htmlparser.tags.Tag tag)