| Method from org.apache.lucene.demo.html.HTMLParser Detail: |
public final Token ArgValue() throws ParseException {
Token t = null;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgValue:
t = jj_consume_token(ArgValue);
{if (true) return t;}
break;
default:
jj_la1[5] = jj_gen;
if (jj_2_1(2)) {
jj_consume_token(ArgQuote1);
jj_consume_token(CloseQuote1);
{if (true) return t;}
} else {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgQuote1:
jj_consume_token(ArgQuote1);
t = jj_consume_token(Quote1Text);
jj_consume_token(CloseQuote1);
{if (true) return t;}
break;
default:
jj_la1[6] = jj_gen;
if (jj_2_2(2)) {
jj_consume_token(ArgQuote2);
jj_consume_token(CloseQuote2);
{if (true) return t;}
} else {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgQuote2:
jj_consume_token(ArgQuote2);
t = jj_consume_token(Quote2Text);
jj_consume_token(CloseQuote2);
{if (true) return t;}
break;
default:
jj_la1[7] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
}
}
}
throw new Error("Missing return statement in function");
}
|
public final void CommentTag() throws ParseException {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case Comment1:
jj_consume_token(Comment1);
label_4:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CommentText1:
;
break;
default:
jj_la1[10] = jj_gen;
break label_4;
}
jj_consume_token(CommentText1);
}
jj_consume_token(CommentEnd1);
break;
case Comment2:
jj_consume_token(Comment2);
label_5:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CommentText2:
;
break;
default:
jj_la1[11] = jj_gen;
break label_5;
}
jj_consume_token(CommentText2);
}
jj_consume_token(CommentEnd2);
break;
default:
jj_la1[12] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
|
public final Token Decl() throws ParseException {
Token t;
t = jj_consume_token(DeclName);
label_3:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgName:
case ArgEquals:
case ArgValue:
case ArgQuote1:
case ArgQuote2:
;
break;
default:
jj_la1[8] = jj_gen;
break label_3;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgName:
jj_consume_token(ArgName);
break;
case ArgValue:
case ArgQuote1:
case ArgQuote2:
ArgValue();
break;
case ArgEquals:
jj_consume_token(ArgEquals);
break;
default:
jj_la1[9] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
jj_consume_token(TagEnd);
{if (true) return t;}
throw new Error("Missing return statement in function");
}
|
public final void HTMLDocument() throws IOException, ParseException {
Token t;
label_1:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ScriptStart:
case TagName:
case DeclName:
case Comment1:
case Comment2:
case Word:
case Entity:
case Space:
case Punct:
;
break;
default:
jj_la1[0] = jj_gen;
break label_1;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TagName:
Tag();
afterTag = true;
break;
case DeclName:
t = Decl();
afterTag = true;
break;
case Comment1:
case Comment2:
CommentTag();
afterTag = true;
break;
case ScriptStart:
ScriptTag();
afterTag = true;
break;
case Word:
t = jj_consume_token(Word);
addText(t.image); afterTag = false;
break;
case Entity:
t = jj_consume_token(Entity);
addText(Entities.decode(t.image)); afterTag = false;
break;
case Punct:
t = jj_consume_token(Punct);
addText(t.image); afterTag = false;
break;
case Space:
jj_consume_token(Space);
addSpace(); afterTag = false;
break;
default:
jj_la1[1] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
jj_consume_token(0);
}
|
public void ReInit(InputStream stream) {
jj_input_stream.ReInit(stream, 1, 1);
token_source.ReInit(jj_input_stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
|
public void ReInit(Reader stream) {
jj_input_stream.ReInit(stream, 1, 1);
token_source.ReInit(jj_input_stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
|
public void ReInit(HTMLParserTokenManager tm) {
token_source = tm;
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
|
public final void ScriptTag() throws ParseException {
jj_consume_token(ScriptStart);
label_6:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ScriptText:
;
break;
default:
jj_la1[13] = jj_gen;
break label_6;
}
jj_consume_token(ScriptText);
}
jj_consume_token(ScriptEnd);
}
|
public final void Tag() throws IOException, ParseException {
Token t1, t2;
boolean inImg = false;
t1 = jj_consume_token(TagName);
String tagName = t1.image.toLowerCase();
if(Tags.WS_ELEMS.contains(tagName) ) {
addSpace();
}
inTitle = tagName.equalsIgnoreCase("< title"); // keep track if in < TITLE >
inMetaTag = tagName.equalsIgnoreCase("< META"); // keep track if in < META >
inStyle = tagName.equalsIgnoreCase("< STYLE"); // keep track if in < STYLE >
inImg = tagName.equalsIgnoreCase("< img"); // keep track if in < IMG >
label_2:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgName:
;
break;
default:
jj_la1[2] = jj_gen;
break label_2;
}
t1 = jj_consume_token(ArgName);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgEquals:
jj_consume_token(ArgEquals);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgValue:
case ArgQuote1:
case ArgQuote2:
t2 = ArgValue();
if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
addText("[" + t2.image + "]");
if(inMetaTag &&
( t1.image.equalsIgnoreCase("name") ||
t1.image.equalsIgnoreCase("HTTP-EQUIV")
)
&& t2 != null)
{
currentMetaTag=t2.image.toLowerCase();
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
}
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
null)
{
currentMetaContent=t2.image.toLowerCase();
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
}
break;
default:
jj_la1[3] = jj_gen;
;
}
break;
default:
jj_la1[4] = jj_gen;
;
}
}
jj_consume_token(TagEnd);
}
|
void addMetaTag() {
metaTags.setProperty(currentMetaTag, currentMetaContent);
currentMetaTag = null;
currentMetaContent = null;
return;
}
|
void addSpace() throws IOException {
if (!afterSpace) {
if (inTitle)
title.append(" ");
else
addToSummary(" ");
String space = afterTag ? eol : " ";
length += space.length();
pipeOut.write(space);
afterSpace = true;
}
}
|
void addText(String text) throws IOException {
if (inStyle)
return;
if (inTitle)
title.append(text);
else {
addToSummary(text);
if (!titleComplete && !title.equals("")) { // finished title
synchronized(this) {
titleComplete = true; // tell waiting threads
notifyAll();
}
}
}
length += text.length();
pipeOut.write(text);
afterSpace = false;
}
|
void addToSummary(String text) {
if (summary.length() < SUMMARY_LENGTH) {
summary.append(text);
if (summary.length() >= SUMMARY_LENGTH) {
synchronized(this) {
notifyAll();
}
}
}
}
|
public final void disable_tracing() {
}
|
public final void enable_tracing() {
}
|
public ParseException generateParseException() {
jj_expentries.removeAllElements();
boolean[] la1tokens = new boolean[31];
for (int i = 0; i < 31; i++) {
la1tokens[i] = false;
}
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
}
for (int i = 0; i < 14; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1< < j)) != 0) {
la1tokens[j] = true;
}
}
}
}
for (int i = 0; i < 31; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
jj_expentries.addElement(jj_expentry);
}
}
jj_endpos = 0;
jj_rescan_token();
jj_add_error_token(0, 0);
int[][] exptokseq = new int[jj_expentries.size()][];
for (int i = 0; i < jj_expentries.size(); i++) {
exptokseq[i] = (int[])jj_expentries.elementAt(i);
}
return new ParseException(token, exptokseq, tokenImage);
}
|
public Properties getMetaTags() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (titleComplete || pipeInStream.full())
break;
wait(10);
}
}
return metaTags;
}
|
public final Token getNextToken() {
if (token.next != null) token = token.next;
else token = token.next = token_source.getNextToken();
jj_ntk = -1;
jj_gen++;
return token;
}
|
public Reader getReader() throws IOException {
if (pipeIn == null) {
pipeInStream = new MyPipedInputStream();
pipeOutStream = new PipedOutputStream(pipeInStream);
pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE");
pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE");
Thread thread = new ParserThread(this);
thread.start(); // start parsing
}
return pipeIn;
}
|
public String getSummary() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
break;
wait(10);
}
}
if (summary.length() > SUMMARY_LENGTH)
summary.setLength(SUMMARY_LENGTH);
String sum = summary.toString().trim();
String tit = getTitle();
if (sum.startsWith(tit) || sum.equals(""))
return tit;
else
return sum;
}
|
public String getTitle() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (titleComplete || pipeInStream.full())
break;
wait(10);
}
}
return title.toString().trim();
}
|
public final Token getToken(int index) {
Token t = lookingAhead ? jj_scanpos : token;
for (int i = 0; i < index; i++) {
if (t.next != null) t = t.next;
else t = t.next = token_source.getNextToken();
}
return t;
}
|