1 /*
2 * IF YOU ARE HAVING TROUBLE COMPILING THIS CLASS, IT IS PROBABLY BECAUSE Lexer.java IS MISSING.
3 *
4 * Use 'ant jflex' to generate the file, which will reside in build/java
5 */
6
7 package com.opensymphony.module.sitemesh.html.tokenizer;
8
9 import com.opensymphony.module.sitemesh.html.Tag;
10 import com.opensymphony.module.sitemesh.html.Text;
11 import com.opensymphony.module.sitemesh.html.util.CharArray;
12 import com.opensymphony.module.sitemesh.util.CharArrayReader;
13
14 import java.io.IOException;
15 import java.util.ArrayList;
16 import java.util.List;
17
18 /**
19 * Looks for patterns of tokens in the Lexer and translates these to calls to pass to the TokenHandler.
20 *
21 * @author Joe Walnes
22 * @see TagTokenizer
23 */
24 class Parser extends Lexer implements Text, Tag {
25
26 private final CharArray attributeBuffer = new CharArray(64);
27 private int pushbackToken = -1;
28 private String pushbackText;
29
30 public final static short SLASH=257;
31 public final static short WHITESPACE=258;
32 public final static short EQUALS=259;
33 public final static short QUOTE=260;
34 public final static short WORD=261;
35 public final static short TEXT=262;
36 public final static short QUOTED=263;
37 public final static short LT=264;
38 public final static short GT=265;
39 public final static short LT_OPEN_MAGIC_COMMENT=266;
40 public final static short LT_CLOSE_MAGIC_COMMENT=267;
41
42 private final char[] input;
43
44 private TokenHandler handler;
45
46 private int position;
47 private int length;
48
49 private String name;
50 private int type;
51 private final List attributes = new ArrayList();
52
53 public Parser(char[] input, TokenHandler handler) {
54 super(new CharArrayReader(input));
55 this.input = input;
56 this.handler = handler;
57 }
58
59 private String text() {
60 if (pushbackToken == -1) {
61 return yytext();
62 } else {
63 return pushbackText;
64 }
65 }
66
67 private void skipWhiteSpace() throws IOException {
68 while (true) {
69 int next;
70 if (pushbackToken == -1) {
71 next = yylex();
72 } else {
73 next = pushbackToken;
74 pushbackToken = -1;
75 }
76 if (next != Parser.WHITESPACE) {
77 pushBack(next);
78 break;
79 }
80 }
81 }
82
83 private void pushBack(int next) {
84 if (pushbackToken != -1) {
85 reportError("Cannot pushback more than once", line(), column());
86 }
87 pushbackToken = next;
88 if (next == Parser.WORD || next == Parser.QUOTED || next == Parser.SLASH || next == Parser.EQUALS) {
89 pushbackText = yytext();
90 } else {
91 pushbackText = null;
92 }
93 }
94
95 public void start() {
96 try {
97 while (true) {
98 int token;
99 if (pushbackToken == -1) {
100 token = yylex();
101 } else {
102 token = pushbackToken;
103 pushbackToken = -1;
104 }
105 if (token == 0) {
106 // EOF
107 return;
108 } else if (token == Parser.TEXT) {
109 // Got some text
110 parsedText(position(), length());
111 } else if (token == Parser.LT) {
112 // Token "<" - start of tag
113 parseTag(Tag.OPEN);
114 } else if (token == Parser.LT_OPEN_MAGIC_COMMENT) {
115 // Token "<!--[" - start of open magic comment
116 parseTag(Tag.OPEN_MAGIC_COMMENT);
117 } else if (token == Parser.LT_CLOSE_MAGIC_COMMENT) {
118 // Token "<![" - start of close magic comment
119 parseTag(Tag.CLOSE_MAGIC_COMMENT);
120 } else {
121 reportError("Unexpected token from lexer, was expecting TEXT or LT", line(), column());
122 }
123 }
124 } catch (IOException e) {
125 throw new RuntimeException(e);
126 }
127 }
128
129 private void parseTag(int type) throws IOException {
130 // Start parsing a TAG
131
132 int start = position();
133 skipWhiteSpace();
134 int token;
135 if (pushbackToken == -1) {
136 token = yylex();
137 } else {
138 token = pushbackToken;
139 pushbackToken = -1;
140 }
141 String name;
142
143 if (token == Parser.SLASH) {
144 // Token "/" - it's a closing tag
145 type = Tag.CLOSE;
146 if (pushbackToken == -1) {
147 token = yylex();
148 } else {
149 token = pushbackToken;
150 pushbackToken = -1;
151 }
152 }
153
154 if (token == Parser.WORD) {
155 // Token WORD - name of tag
156 name = text();
157
158 if (handler.shouldProcessTag(name)) {
159 parseFullTag(type, name, start);
160 } else {
161
162 // don't care about this tag... scan to the end and treat it as text
163 while(true) {
164 if (pushbackToken == -1) {
165 token = yylex();
166 } else {
167 token = pushbackToken;
168 pushbackToken = -1;
169 }
170 if (token == Parser.GT) {
171 pushBack(yylex()); // take and replace the next token, so the position is correct
172 parsedText(start, position() - start);
173 return;
174 }
175 }
176 }
177
178 } else if (token == Parser.GT) {
179 // Token ">" - an illegal <> or < > tag. Ignore
180 } else {
181 reportError("Could not recognise tag", line(), column());
182 }
183 }
184
185 private void parseFullTag(int type, String name, int start) throws IOException {
186 int token;
187 while (true) {
188 skipWhiteSpace();
189 if (pushbackToken == -1) {
190 token = yylex();
191 } else {
192 token = pushbackToken;
193 pushbackToken = -1;
194 }
195 pushBack(token);
196
197 if (token == Parser.SLASH || token == Parser.GT) {
198 break; // no more attributes here
199 } else if (token == Parser.WORD) {
200 parseAttribute(); // start of an attribute
201 } else {
202 reportError("XXY", line(), column());
203 }
204 }
205
206 if (pushbackToken == -1) {
207 token = yylex();
208 } else {
209 token = pushbackToken;
210 pushbackToken = -1;
211 }
212 if (token == Parser.SLASH) {
213 // Token "/" - it's an empty tag
214 type = Tag.EMPTY;
215 if (pushbackToken == -1) {
216 token = yylex();
217 } else {
218 token = pushbackToken;
219 pushbackToken = -1;
220 }
221 }
222
223 if (token == Parser.GT) {
224 // Token ">" - YAY! end of tag.. process it!
225 parsedTag(type, name, start, position() - start + 1);
226 } else {
227 reportError("Expected end of tag", line(), column());
228 }
229 }
230
231 private void parseAttribute() throws IOException {
232 int token;
233 if (pushbackToken == -1) {
234 token = yylex();
235 } else {
236 token = pushbackToken;
237 pushbackToken = -1;
238 }
239 // Token WORD - start of an attribute
240 String attributeName = text();
241 skipWhiteSpace();
242 if (pushbackToken == -1) {
243 token = yylex();
244 } else {
245 token = pushbackToken;
246 pushbackToken = -1;
247 }
248 if (token == Parser.EQUALS) {
249 // Token "=" - the attribute has a value
250 skipWhiteSpace();
251 if (pushbackToken == -1) {
252 token = yylex();
253 } else {
254 token = pushbackToken;
255 pushbackToken = -1;
256 }
257 if (token == Parser.QUOTED) {
258 // token QUOTED - a quoted literal as the attribute value
259 parsedAttribute(attributeName, text(), true);
260 } else if (token == Parser.WORD || token == Parser.SLASH) {
261 // unquoted word
262 attributeBuffer.clear();
263 attributeBuffer.append(text());
264 while (true) {
265 int next;
266 if (pushbackToken == -1) {
267 next = yylex();
268 } else {
269 next = pushbackToken;
270 pushbackToken = -1;
271 }
272 if (next == Parser.WORD || next == Parser.EQUALS || next == Parser.SLASH) {
273 attributeBuffer.append(text());
274 // TODO: how to handle <a x=c/> ?
275 } else {
276 pushBack(next);
277 break;
278 }
279 }
280 parsedAttribute(attributeName, attributeBuffer.toString(), false);
281 } else if (token == Parser.SLASH || token == Parser.GT) {
282 // no more attributes
283 pushBack(token);
284 } else {
285 reportError("Illegal attribute value", line(), column());
286 }
287 } else if (token == Parser.SLASH || token == Parser.GT || token == Parser.WORD) {
288 // it was a value-less HTML style attribute
289 parsedAttribute(attributeName, null, false);
290 pushBack(token);
291 } else {
292 reportError("Illegal attribute name", line(), column());
293 }
294 }
295
296 public void parsedText(int position, int length) {
297 this.position = position;
298 this.length = length;
299 handler.text((Text) this);
300 }
301
302 public void parsedTag(int type, String name, int start, int length) {
303 this.type = type;
304 this.name = name;
305 this.position = start;
306 this.length = length;
307 handler.tag((Tag) this);
308 attributes.clear();
309 }
310
311 public void parsedAttribute(String name, String value, boolean quoted) {
312 attributes.add(name);
313 if (quoted) {
314 attributes.add(value.substring(1, value.length() - 1));
315 } else {
316 attributes.add(value);
317 }
318 }
319
320 protected void reportError(String message, int line, int column) {
321 // System.out.println(message);
322 handler.warning(message, line, column);
323 }
324
325 public String getName() {
326 return name;
327 }
328
329 public int getType() {
330 return type;
331 }
332
333 public String getContents() {
334 return new String(input, position, length);
335 }
336
337 public void writeTo(CharArray out) {
338 out.append(input, position, length);
339 }
340
341 public int getAttributeCount() {
342 return attributes == null ? 0 : attributes.size() / 2;
343 }
344
345 public String getAttributeName(int index) {
346 return (String) attributes.get(index * 2);
347 }
348
349 public String getAttributeValue(int index) {
350 return (String) attributes.get(index * 2 + 1);
351 }
352
353 public String getAttributeValue(String name) {
354 // todo: optimize
355 if (attributes == null) {
356 return null;
357 }
358 final int len = attributes.size();
359 for (int i = 0; i < len; i+=2) {
360 if (name.equalsIgnoreCase((String) attributes.get(i))) {
361 return (String) attributes.get(i + 1);
362 }
363 }
364 return null;
365 }
366
367 public boolean hasAttribute(String name) {
368 return getAttributeValue(name) != null;
369 }
370
371 }