Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

com.telefonicasoluciones.search.server.parser.pdf
Class PDFHandler  view PDFHandler download PDFHandler.java

java.lang.Object
  extended bycom.telefonicasoluciones.search.server.parser.pdf.PDFHandler

public class PDFHandler
extends java.lang.Object


Field Summary
private  java.lang.String author
           
private static char[] AUTHOR
           
private  byte[] buf
           
private  int compression
           
private  java.lang.StringBuffer contents
           
private static char[] CREATIONDATE
           
private  int currentPosition
           
(package private)  java.text.SimpleDateFormat dateFormatter
           
private  java.lang.String description
           
private static char[] ENDSTREAM
           
private static int FLATE
           
private  java.io.InputStream in
           
private  java.lang.String keywords
           
private static char[] KEYWORDS
           
private  int len
           
private static int LZW
           
private static char[] NEWLINE
           
private static int NONE
           
private static char[] PARAMSTART
           
private  boolean parseNextStream
           
private  int pos
           
private  long published
           
private static char[] RETURN
           
private static char[] STREAM
           
private  boolean streamHit
           
private static char[] SUBJECT
           
private  java.lang.String title
           
private static char[] TITLE
           
private static char[][] tokens
           
 
Constructor Summary
PDFHandler()
          PdfParser constructor comment.
 
Method Summary
private  char[] findToken()
          Look for tokens.
 java.lang.String getAuthor()
          Parse Content.
 java.lang.String getCategories()
          Return categories (from META tags)
 java.lang.String getContents()
          Parse Content.
 java.lang.String getDescription()
          Parse Content.
 java.lang.String getHREF()
          Return META HREF
 java.lang.String getKeywords()
          Parse Content.
 java.util.List getLinks()
          Return links
 long getPublished()
          Parse Content.
 boolean getRobotFollow()
          Return boolean true if links are to be followed
 boolean getRobotIndex()
          Return boolean true it this is to be indexed
 java.lang.String getTitle()
          Parse Content.
private  boolean isNewLineChar(char ch)
          Check for new line chars
private  boolean nextLine()
          Parse Content.
 void parse(java.io.InputStream in)
          Parse Content.
private  void parseContent()
          Parse Content.
private  java.lang.String parseData()
          Look for tokens.
private  java.lang.String parseDataParams()
          Look for tokens.
private  java.lang.String parseDataStream()
          Look for tokens.
private  long parseDate()
          Look for tokens.
private  int read()
           
private  char readCh()
           
private  void reset()
          Return contents
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

in

private java.io.InputStream in

buf

private byte[] buf

pos

private int pos

len

private int len

currentPosition

private int currentPosition

dateFormatter

java.text.SimpleDateFormat dateFormatter

author

private java.lang.String author

published

private long published

keywords

private java.lang.String keywords

description

private java.lang.String description

title

private java.lang.String title

contents

private java.lang.StringBuffer contents

streamHit

private boolean streamHit

parseNextStream

private boolean parseNextStream

NONE

private static final int NONE
See Also:
Constant Field Values

FLATE

private static final int FLATE
See Also:
Constant Field Values

LZW

private static final int LZW
See Also:
Constant Field Values

compression

private int compression

AUTHOR

private static final char[] AUTHOR

CREATIONDATE

private static final char[] CREATIONDATE

ENDSTREAM

private static final char[] ENDSTREAM

KEYWORDS

private static final char[] KEYWORDS

STREAM

private static final char[] STREAM

SUBJECT

private static final char[] SUBJECT

TITLE

private static final char[] TITLE

NEWLINE

private static final char[] NEWLINE

RETURN

private static final char[] RETURN

PARAMSTART

private static final char[] PARAMSTART

tokens

private static final char[][] tokens
Constructor Detail

PDFHandler

public PDFHandler()
PdfParser constructor comment.

Method Detail

findToken

private char[] findToken()
                  throws java.io.IOException
Look for tokens. This is not effiecent. Should use low, hi method with ordered array. NEED TO RECODE


getAuthor

public java.lang.String getAuthor()
Parse Content. [24] 320:1


getCategories

public java.lang.String getCategories()
Return categories (from META tags)


getContents

public java.lang.String getContents()
Parse Content. [24] 320:1


getDescription

public java.lang.String getDescription()
Parse Content. [24] 320:1


getHREF

public java.lang.String getHREF()
Return META HREF


getKeywords

public java.lang.String getKeywords()
Parse Content. [24] 320:1


getLinks

public java.util.List getLinks()
Return links


getPublished

public long getPublished()
Parse Content. [24] 320:1


getRobotFollow

public boolean getRobotFollow()
Return boolean true if links are to be followed


getRobotIndex

public boolean getRobotIndex()
Return boolean true it this is to be indexed


getTitle

public java.lang.String getTitle()
Parse Content. [24] 320:1


isNewLineChar

private boolean isNewLineChar(char ch)
Check for new line chars


nextLine

private boolean nextLine()
                  throws java.io.IOException
Parse Content. [24] 320:1


parse

public void parse(java.io.InputStream in)
Parse Content.


parseContent

private void parseContent()
                   throws java.io.IOException
Parse Content. [24] 320:1


parseData

private java.lang.String parseData()
                            throws java.io.IOException
Look for tokens. This is not effiecent. Should use low, hi method with ordered array. NEED TO RECODE


parseDataParams

private java.lang.String parseDataParams()
                                  throws java.io.IOException
Look for tokens. This is not effiecent. Should use low, hi method with ordered array. NEED TO RECODE


parseDataStream

private java.lang.String parseDataStream()
                                  throws java.io.IOException
Look for tokens. This is not effiecent. Should use low, hi method with ordered array. NEED TO RECODE


parseDate

private long parseDate()
                throws java.io.IOException
Look for tokens. This is not effiecent. Should use low, hi method with ordered array. NEED TO RECODE


read

private final int read()
                throws java.io.IOException

readCh

private final char readCh()
                   throws java.io.IOException

reset

private void reset()
Return contents