Source code: cvu/html/TagToken.java
1 /*
2 * HTML Parser
3 * Copyright (C) 1997 David McNicol
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file COPYING for more details.
14 */
15
16 package cvu.html;
17
18 import java.util.Hashtable;
19 import java.util.Vector;
20 import java.util.Enumeration;
21
22 /**
23 * This represents a single HTML tag. Each TagToken has a name and a
24 * list of attributes and values.
25 * @see HTMLTokenizer
26 * @author <a href="http://www.strath.ac.uk/~ras97108/">David McNicol</a>
27 */
28 public class TagToken {
29
30 /** Identifies the escape character. */
31 public static final char ESCAPE = '\\';
32
33 /** Identifies the quotation character. */
34 public static final char QUOTE = '"';
35
36 /** Stores the name of the TagToken. */
37 private String name;
38
39 /** Indicates whether the TagToken is an end-token. */
40 private boolean end = false;
41
42 /** Stores a list of attributes and their values. */
43 private AttributeList attr;
44
45 /**
46 * Constructs a new TagToken converting the specified string
47 * into a token name and a list of attributes with values.
48 * @param line the raw data.
49 */
50 public TagToken (String line) {
51 name = null;
52 attr = new AttributeList();
53 tokenizeAttributes(line);
54 }
55
56 /**
57 * Returns the name of the TagToken.
58 */
59 public String getName () {
60 return name;
61 }
62
63 /**
64 * Returns the attribute list of the TagToken.
65 */
66 public AttributeList getAttributes () {
67 return attr;
68 }
69
70 /**
71 * Indicates whether this token is an end tag.
72 */
73 public boolean isEndTag () {
74 return end;
75 }
76
77 /**
78 * Returns true if the given attribute exists.
79 * @param name the name of the attribute.
80 */
81 public boolean isAttribute (String name) {
82 return attr.exists(name);
83 }
84
85 /**
86 * Returns the value of the specified attribute or null if the
87 * attribute does not exist.
88 * @param name the name of the attribute.
89 */
90 public String getAttribute (String name) {
91 return attr.get(name);
92 }
93
94 /**
95 * Returns an attribute with all double quote characters
96 * escaped with a backslash.
97 * @param name the name of the attribute.
98 */
99 public String getQuotedAttribute (String name) {
100
101 // Check that the attribute list is there.
102 if (attr == null) return null;
103
104 // Return the quoted version.
105 return attr.getQuoted(name);
106 }
107
108 /**
109 * Returns a string version of the attribute and its value.
110 * @param name the name of the attribute.
111 */
112 public String getAttributeToString (String name) {
113
114 // Check that the attribute list is there.
115 if (attr == null) return null;
116
117 // Return the string version.
118 return attr.toString(name);
119 }
120
121 /**
122 * Returns a string version of the TagToken.
123 */
124 public String toString () {
125
126 StringBuffer sb; // Stores the string to be returned.
127 Enumeration list; // List of node's arguments or children.
128
129 // Get a new StringBuffer.
130 sb = new StringBuffer();
131
132 // Write the opening of the tag.
133 if (end)
134 sb.append("</" + name);
135 else
136 sb.append('<' + name);
137
138 // Check if there are any attributes.
139 if (attr != null && attr.size() > 0) {
140
141 // Print string version of the attributes.
142 sb.append(' ').append(attr.toString());
143 }
144
145 // Finish off the tag.
146 sb.append('>');
147
148 // Return the string version.
149 return sb.toString();
150 }
151
152 /**
153 * Sets the name of the token and also whether it is a begin
154 * or an end token.
155 * @param name the name of the token.
156 */
157 private void setName (String name) {
158
159 if (name == null) {
160 this.name = null;
161 return;
162 }
163
164 String lcname = name.toLowerCase();
165
166 if (lcname.charAt(0) == '/') {
167 this.name = lcname.substring(1);
168 end = true;
169 } else {
170 this.name = lcname;
171 }
172 }
173
174 /**
175 * Adds a attribute and value to the list.
176 * @param name the name of the attribute.
177 * @param value the value of the attribute.
178 */
179 private void setAttribute (String name, String value) {
180 attr.set(name, value);
181 }
182
183 /**
184 * Adds a attribute to the list using the given string. The string
185 * may either be in the form 'attribute' or 'attribute=value'.
186 * @param s contains the attribute information.
187 */
188 private void setAttribute (String s) {
189
190 int idx; // The index of the = sign in the string.
191 String name; // Stores the name of the attribute.
192 String value; // Stores the value of the attribute.
193
194 // Check if the string is null.
195 if (s == null) return;
196
197 // Get the index of = within the string.
198 idx = s.indexOf('=');
199
200 // Check if there was '=' character present.
201 if (idx < 0) {
202
203 // If not, add the whole string as the attribute
204 // name with a null value.
205 setAttribute(s, "");
206 } else {
207
208 // If so, split the string into a name and value.
209
210 name = s.substring(0, idx);
211 value = s.substring(idx + 1);
212
213 // Add the name and value to the attribute list.
214 setAttribute(name, value);
215 }
216 }
217
218 /**
219 * Tokenizes the given string and uses the resulting vector
220 * to to build up the TagToken's attribute list.
221 * @param args the string to tokenize.
222 */
223 private void tokenizeAttributes (String args) {
224
225 Vector v; // Vector of tokens from the string.
226 Enumeration e; // Enumeration of vector elements.
227 String[] tokens = null; // Array of tokens from vector.
228 int length; // Size of the vector.
229 int i; // Loop variable.
230
231 // Get the vector of tokens.
232 v = tokenizeString(args);
233
234 // Check it is not null.
235 if (v == null) return;
236
237 // Create a new String array.
238 length = v.size() - 1;
239 if (length > 0) tokens = new String[length];
240
241 // Get an enumeration of the vector's elements.
242 e = v.elements();
243
244 // Store the first element as the TagToken's name.
245 setName((String) e.nextElement());
246
247 // Stop processing now if there are no more elements.
248 if (! e.hasMoreElements()) return;
249
250 // Put the rest of the elements into the string array.
251 i = 0;
252 while (e.hasMoreElements())
253 tokens[i++] = (String) e.nextElement();
254
255 // Deal with the name/value pairs with separate = signs.
256 for (i = 1; i < (length - 1); i++) {
257
258 if (tokens[i] == null) continue;
259
260 if (tokens[i].equals("=")) {
261 setAttribute(tokens[i - 1], tokens[i + 1]);
262 tokens[i] = null;
263 tokens[i - 1] = null;
264 tokens[i + 1] = null;
265 }
266 }
267
268 // Deal with lone attributes and joined name/value pairs.
269 for (i = 0; i < length; i++)
270 if (tokens[i] != null) setAttribute(tokens[i]);
271 }
272
273 /**
274 * This method tokenizes the given string and returns a vector
275 * of its constituent tokens. It understands quoting and character
276 * escapes.
277 * @param s the string to tokenize.
278 */
279 private Vector tokenizeString (String s) {
280
281 // First check that the args are not null or zero-length.
282 if (s == null || s.length() == 0) return null;
283
284 boolean whitespace = false; // True if we are reading w/space.
285 boolean escaped = false; // True if next char is escaped.
286 boolean quoted = false; // True if we are in quotes.
287 int length; // Length of attribute string.
288 int i = 0; // Loop variable.
289
290 // Create a vector to store the complete tokens.
291 Vector tokens = new Vector();
292
293 // Create a buffer to store an individual token.
294 StringBuffer buffer = new StringBuffer(80);
295
296 // Convert the String to a character array;
297 char[] array = s.toCharArray();
298
299 length = array.length;
300
301 // Loop over the character array.
302 while (i < length) {
303
304 // Check if we are currently removing whitespace.
305 if (whitespace) {
306 if (isWhitespace(array[i])) {
307 i++;
308 continue;
309 } else {
310 whitespace = false;
311 }
312 }
313
314 // Check if we are currently escaped.
315 if (escaped) {
316
317 // Add the next character to the array.
318 buffer.append(array[i++]);
319
320 // Turn off the character escape.
321 escaped = false;
322
323 continue;
324 } else {
325
326 // Check for the escape character.
327 if (array[i] == ESCAPE) {
328 escaped = true;
329 i++;
330 continue;
331 }
332
333 // Check for the quotation character.
334 if (array[i] == QUOTE) {
335 quoted = !quoted;
336 i++;
337 continue;
338 }
339
340 // Check for the end of the token.
341 if (!quoted && isWhitespace(array[i])) {
342
343 // Add the token and refresh the buffer.
344 tokens.addElement(buffer.toString());
345 buffer = new StringBuffer(80);
346
347 // Stop reading the token.
348 whitespace = true;
349
350 continue;
351 }
352
353 // Otherwise add the character to the buffer.
354 buffer.append(array[i++]);
355 }
356 }
357
358 // Add the last token to the vector if there is one.
359 if (! whitespace) tokens.addElement(buffer.toString());
360
361 return tokens;
362 }
363
364 /**
365 * Returns true if the given character is considered to be
366 * whitespace.
367 * @param c the character to test.
368 */
369 private boolean isWhitespace (char c) {
370 return (c == ' ' || c == '\t' || c == '\n');
371 }
372 }