Source code: org/htmlparser/RemarkNodeParser.java
1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/RemarkNodeParser.java,v 1.2 2004/02/10 13:41:10 woolfel Exp $
2 /*
3 * ====================================================================
4 * Copyright 2002-2004 The Apache Software Foundation.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 */
19
20 // The developers of JMeter and Apache are greatful to the developers
21 // of HTMLParser for giving Apache Software Foundation a non-exclusive
22 // license. The performance benefits of HTMLParser are clear and the
23 // users of JMeter will benefit from the hard work the HTMLParser
24 // team. For detailed information about HTMLParser, the project is
25 // hosted on sourceforge at http://htmlparser.sourceforge.net/.
26 //
27 // HTMLParser was originally created by Somik Raha in 2000. Since then
28 // a healthy community of users has formed and helped refine the
29 // design so that it is able to tackle the difficult task of parsing
30 // dirty HTML. Derrick Oswald is the current lead developer and was kind
31 // enough to assist JMeter.
32
33 package org.htmlparser;
34 public class RemarkNodeParser
35 {
36 public final static int REMARK_NODE_BEFORE_PARSING_STATE = 0;
37 public final static int REMARK_NODE_OPENING_ANGLE_BRACKET_STATE = 1;
38 public final static int REMARK_NODE_EXCLAMATION_RECEIVED_STATE = 2;
39 public final static int REMARK_NODE_FIRST_DASH_RECEIVED_STATE = 3;
40 public final static int REMARK_NODE_ACCEPTING_STATE = 4;
41 public final static int REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE = 5;
42 public final static int REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE = 6;
43 public final static int REMARK_NODE_ACCEPTED_STATE = 7;
44 public final static int REMARK_NODE_ILLEGAL_STATE = 8;
45 public final static int REMARK_NODE_FINISHED_PARSING_STATE = 2;
46
47 /**
48 * Locate the remark tag withing the input string, by parsing from the given position
49 * @param reader HTML reader to be provided so as to allow reading of next line
50 * @param input Input String
51 * @param position Position to start parsing from
52 */
53 public RemarkNode find(NodeReader reader, String input, int position)
54 {
55 int state = REMARK_NODE_BEFORE_PARSING_STATE;
56 StringBuffer tagContents = new StringBuffer();
57 int tagBegin = 0;
58 int tagEnd = 0;
59 int i = position;
60 int inputLen = input.length();
61 char ch, prevChar = ' ';
62 while (i < inputLen && state < REMARK_NODE_ACCEPTED_STATE)
63 {
64 ch = input.charAt(i);
65 if (state == REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE)
66 {
67 if (ch == '>')
68 {
69 state = REMARK_NODE_ACCEPTED_STATE;
70 tagEnd = i;
71 }
72 else if (ch == '-')
73 {
74 tagContents.append(prevChar);
75 }
76 else
77 {
78 // Rollback last 2 characters (assumed same)
79 state = REMARK_NODE_ACCEPTING_STATE;
80 tagContents.append(prevChar);
81 tagContents.append(prevChar);
82 }
83
84 }
85
86 if (state == REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE)
87 {
88 if (ch == '-')
89 {
90 state = REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE;
91 }
92 else
93 {
94 // Rollback
95 state = REMARK_NODE_ACCEPTING_STATE;
96 tagContents.append(prevChar);
97 }
98 }
99 if (state == REMARK_NODE_ACCEPTING_STATE)
100 {
101 if (ch == '-')
102 {
103 state = REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE;
104 } /*else
105 if (ch == '<')
106 {
107 state=REMARK_NODE_ILLEGAL_STATE;
108 } */
109 }
110 if (state == REMARK_NODE_ACCEPTING_STATE)
111 {
112 // We can append contents now
113 tagContents.append(ch);
114 }
115
116 if (state == REMARK_NODE_FIRST_DASH_RECEIVED_STATE)
117 {
118 if (ch == '-')
119 {
120 state = REMARK_NODE_ACCEPTING_STATE;
121 // Do a lookahead and see if the next char is >
122 if (input.length() > i + 1 && input.charAt(i + 1) == '>')
123 {
124 state = REMARK_NODE_ACCEPTED_STATE;
125 tagEnd = i + 1;
126 }
127 }
128 else
129 state = REMARK_NODE_ILLEGAL_STATE;
130 }
131 if (state == REMARK_NODE_EXCLAMATION_RECEIVED_STATE)
132 {
133 if (ch == '-')
134 state = REMARK_NODE_FIRST_DASH_RECEIVED_STATE;
135 else if (ch == '>')
136 {
137 state = REMARK_NODE_ACCEPTED_STATE;
138 tagEnd = i;
139 }
140 else
141 state = REMARK_NODE_ILLEGAL_STATE;
142 }
143 if (state == REMARK_NODE_OPENING_ANGLE_BRACKET_STATE)
144 {
145 if (ch == '!')
146 state = REMARK_NODE_EXCLAMATION_RECEIVED_STATE;
147 else
148 state = REMARK_NODE_ILLEGAL_STATE;
149 // This is not a remark tag
150 }
151 if (state == REMARK_NODE_BEFORE_PARSING_STATE)
152 {
153 if (ch == '<')
154 {
155 // Transition from State 0 to State 1 - Record data till > is encountered
156 tagBegin = i;
157 state = REMARK_NODE_OPENING_ANGLE_BRACKET_STATE;
158 }
159 else if (ch != ' ')
160 {
161 // Its not a space, hence this is probably a string node, not a remark node
162 state = REMARK_NODE_ILLEGAL_STATE;
163 }
164 }
165 // if (state > REMARK_NODE_OPENING_ANGLE_BRACKET_STATE && state < REMARK_NODE_ACCEPTED_STATE && i == input.length() - 1)
166 if (state >= REMARK_NODE_ACCEPTING_STATE
167 && state < REMARK_NODE_ACCEPTED_STATE
168 && i == input.length() - 1)
169 {
170 // We need to continue parsing to the next line
171 //input = reader.getNextLine();
172 tagContents.append(Node.getLineSeparator());
173 do
174 {
175 input = reader.getNextLine();
176 }
177 while (input != null && input.length() == 0);
178 if (input != null)
179 inputLen = input.length();
180 else
181 inputLen = -1;
182 i = -1;
183 }
184 if (state == REMARK_NODE_ILLEGAL_STATE)
185 {
186 return null;
187 }
188 i++;
189 prevChar = ch;
190 }
191 if (state == REMARK_NODE_ACCEPTED_STATE)
192 return new RemarkNode(tagBegin, tagEnd, tagContents.toString());
193 else
194 return null;
195 }
196 }