Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/htmlparser/RemarkNodeParser.java


1   // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/RemarkNodeParser.java,v 1.2 2004/02/10 13:41:10 woolfel Exp $
2   /*
3    * ====================================================================
4    * Copyright 2002-2004 The Apache Software Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   * 
18   */
19  
20  // The developers of JMeter and Apache are greatful to the developers
21  // of HTMLParser for giving Apache Software Foundation a non-exclusive
22  // license. The performance benefits of HTMLParser are clear and the
23  // users of JMeter will benefit from the hard work the HTMLParser
24  // team. For detailed information about HTMLParser, the project is
25  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
26  //
27  // HTMLParser was originally created by Somik Raha in 2000. Since then
28  // a healthy community of users has formed and helped refine the
29  // design so that it is able to tackle the difficult task of parsing
30  // dirty HTML. Derrick Oswald is the current lead developer and was kind
31  // enough to assist JMeter.
32  
33  package org.htmlparser;
34  public class RemarkNodeParser
35  {
36      public final static int REMARK_NODE_BEFORE_PARSING_STATE = 0;
37      public final static int REMARK_NODE_OPENING_ANGLE_BRACKET_STATE = 1;
38      public final static int REMARK_NODE_EXCLAMATION_RECEIVED_STATE = 2;
39      public final static int REMARK_NODE_FIRST_DASH_RECEIVED_STATE = 3;
40      public final static int REMARK_NODE_ACCEPTING_STATE = 4;
41      public final static int REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE = 5;
42      public final static int REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE = 6;
43      public final static int REMARK_NODE_ACCEPTED_STATE = 7;
44      public final static int REMARK_NODE_ILLEGAL_STATE = 8;
45      public final static int REMARK_NODE_FINISHED_PARSING_STATE = 2;
46  
47      /**
48       * Locate the remark tag withing the input string, by parsing from the given position
49       * @param reader HTML reader to be provided so as to allow reading of next line
50       * @param input Input String
51       * @param position Position to start parsing from
52       */
53      public RemarkNode find(NodeReader reader, String input, int position)
54      {
55          int state = REMARK_NODE_BEFORE_PARSING_STATE;
56          StringBuffer tagContents = new StringBuffer();
57          int tagBegin = 0;
58          int tagEnd = 0;
59          int i = position;
60          int inputLen = input.length();
61          char ch, prevChar = ' ';
62          while (i < inputLen && state < REMARK_NODE_ACCEPTED_STATE)
63          {
64              ch = input.charAt(i);
65              if (state == REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE)
66              {
67                  if (ch == '>')
68                  {
69                      state = REMARK_NODE_ACCEPTED_STATE;
70                      tagEnd = i;
71                  }
72                  else if (ch == '-')
73                  {
74                      tagContents.append(prevChar);
75                  }
76                  else
77                  {
78                      // Rollback last 2 characters (assumed same)
79                      state = REMARK_NODE_ACCEPTING_STATE;
80                      tagContents.append(prevChar);
81                      tagContents.append(prevChar);
82                  }
83  
84              }
85  
86              if (state == REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE)
87              {
88                  if (ch == '-')
89                  {
90                      state = REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE;
91                  }
92                  else
93                  {
94                      // Rollback
95                      state = REMARK_NODE_ACCEPTING_STATE;
96                      tagContents.append(prevChar);
97                  }
98              }
99              if (state == REMARK_NODE_ACCEPTING_STATE)
100             {
101                 if (ch == '-')
102                 {
103                     state = REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE;
104                 } /*else
105                         if (ch == '<')
106                         {
107                           state=REMARK_NODE_ILLEGAL_STATE;
108                         } */
109             }
110             if (state == REMARK_NODE_ACCEPTING_STATE)
111             {
112                 // We can append contents now    
113                 tagContents.append(ch);
114             }
115 
116             if (state == REMARK_NODE_FIRST_DASH_RECEIVED_STATE)
117             {
118                 if (ch == '-')
119                 {
120                     state = REMARK_NODE_ACCEPTING_STATE;
121                     // Do a lookahead and see if the next char is >
122                     if (input.length() > i + 1 && input.charAt(i + 1) == '>')
123                     {
124                         state = REMARK_NODE_ACCEPTED_STATE;
125                         tagEnd = i + 1;
126                     }
127                 }
128                 else
129                     state = REMARK_NODE_ILLEGAL_STATE;
130             }
131             if (state == REMARK_NODE_EXCLAMATION_RECEIVED_STATE)
132             {
133                 if (ch == '-')
134                     state = REMARK_NODE_FIRST_DASH_RECEIVED_STATE;
135                 else if (ch == '>')
136                 {
137                     state = REMARK_NODE_ACCEPTED_STATE;
138                     tagEnd = i;
139                 }
140                 else
141                     state = REMARK_NODE_ILLEGAL_STATE;
142             }
143             if (state == REMARK_NODE_OPENING_ANGLE_BRACKET_STATE)
144             {
145                 if (ch == '!')
146                     state = REMARK_NODE_EXCLAMATION_RECEIVED_STATE;
147                 else
148                     state = REMARK_NODE_ILLEGAL_STATE;
149                 // This is not a remark tag
150             }
151             if (state == REMARK_NODE_BEFORE_PARSING_STATE)
152             {
153                 if (ch == '<')
154                 {
155                     // Transition from State 0 to State 1 - Record data till > is encountered
156                     tagBegin = i;
157                     state = REMARK_NODE_OPENING_ANGLE_BRACKET_STATE;
158                 }
159                 else if (ch != ' ')
160                 {
161                     // Its not a space, hence this is probably a string node, not a remark node
162                     state = REMARK_NODE_ILLEGAL_STATE;
163                 }
164             }
165             //      if (state > REMARK_NODE_OPENING_ANGLE_BRACKET_STATE && state < REMARK_NODE_ACCEPTED_STATE && i == input.length() - 1)
166             if (state >= REMARK_NODE_ACCEPTING_STATE
167                 && state < REMARK_NODE_ACCEPTED_STATE
168                 && i == input.length() - 1)
169             {
170                 // We need to continue parsing to the next line
171                 //input = reader.getNextLine();
172                 tagContents.append(Node.getLineSeparator());
173                 do
174                 {
175                     input = reader.getNextLine();
176                 }
177                 while (input != null && input.length() == 0);
178                 if (input != null)
179                     inputLen = input.length();
180                 else
181                     inputLen = -1;
182                 i = -1;
183             }
184             if (state == REMARK_NODE_ILLEGAL_STATE)
185             {
186                 return null;
187             }
188             i++;
189             prevChar = ch;
190         }
191         if (state == REMARK_NODE_ACCEPTED_STATE)
192             return new RemarkNode(tagBegin, tagEnd, tagContents.toString());
193         else
194             return null;
195     }
196 }