Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: cvu/html/TagToken.java


1   /*
2    * HTML Parser
3    * Copyright (C) 1997 David McNicol
4    *
5    * This program is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation; either version 2 of the License, or
8    * (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13   * file COPYING for more details.
14   */
15  
16  package cvu.html;
17  
18  import java.util.Hashtable;
19  import java.util.Vector;
20  import java.util.Enumeration;
21  
22  /**
23   * This represents a single HTML tag. Each TagToken has a name and a
24   * list of attributes and values.
25   * @see HTMLTokenizer
26   * @author <a href="http://www.strath.ac.uk/~ras97108/">David McNicol</a>
27   */
28  public class TagToken {
29  
30    /** Identifies the escape character. */
31    public static final char ESCAPE = '\\';
32  
33    /** Identifies the quotation character. */
34    public static final char QUOTE = '"';
35  
36    /** Stores the name of the TagToken. */
37    private String name;
38    
39    /** Indicates whether the TagToken is an end-token. */
40    private boolean end = false;
41  
42    /** Stores a list of attributes and their values. */
43    private AttributeList attr;
44  
45    /**
46     * Constructs a new TagToken converting the specified string
47     * into a token name and a list of attributes with values.
48     * @param line the raw data.
49     */
50    public TagToken (String line) {
51      name = null;
52      attr = new AttributeList();
53      tokenizeAttributes(line);
54    }
55  
56    /**
57     * Returns the name of the TagToken.
58     */
59    public String getName () {
60      return name;
61    }
62  
63    /**
64     * Returns the attribute list of the TagToken.
65     */
66    public AttributeList getAttributes () {
67      return attr;
68    }
69  
70    /**
71     * Indicates whether this token is an end tag.
72     */
73    public boolean isEndTag () {
74      return end;
75    }
76  
77    /**
78     * Returns true if the given attribute exists.
79     * @param name the name of the attribute.
80     */
81    public boolean isAttribute (String name) {
82      return attr.exists(name);
83    }
84  
85    /**
86     * Returns the value of the specified attribute or null if the
87     * attribute does not exist.
88     * @param name the name of the attribute.
89     */
90    public String getAttribute (String name) {
91      return attr.get(name);
92    }
93  
94    /**
95     * Returns an attribute with all double quote characters
96     * escaped with a backslash.
97     * @param name the name of the attribute.
98     */
99    public String getQuotedAttribute (String name) {
100 
101     // Check that the attribute list is there.
102     if (attr == null) return null;
103 
104     // Return the quoted version.
105     return attr.getQuoted(name);
106   }
107 
108   /**
109    * Returns a string version of the attribute and its value.
110    * @param name the name of the attribute.
111    */
112   public String getAttributeToString (String name) {
113 
114     // Check that the attribute list is there.
115     if (attr == null) return null;
116 
117     // Return the string version.
118     return attr.toString(name);
119   }
120 
121   /**
122    * Returns a string version of the TagToken.
123    */
124   public String toString () {
125 
126     StringBuffer sb;  // Stores the string to be returned.
127     Enumeration list; // List of node's arguments or children.
128 
129     // Get a new StringBuffer.
130     sb = new StringBuffer();
131 
132     // Write the opening of the tag.
133     if (end)
134       sb.append("</" + name);
135     else
136       sb.append('<' + name);
137 
138     // Check if there are any attributes.
139     if (attr != null && attr.size() > 0) {
140 
141       // Print string version of the attributes.
142       sb.append(' ').append(attr.toString());
143     }
144 
145     // Finish off the tag.
146     sb.append('>');
147 
148     // Return the string version.
149     return sb.toString();
150   }
151 
152   /**
153    * Sets the name of the token and also whether it is a begin
154    * or an end token.
155    * @param name the name of the token.
156    */
157   private void setName (String name) {
158 
159     if (name == null) {
160       this.name = null;
161       return;
162     }
163 
164     String lcname = name.toLowerCase();
165 
166     if (lcname.charAt(0) == '/') {
167       this.name = lcname.substring(1);
168       end = true;
169     } else {
170       this.name = lcname;
171     }
172   }
173 
174   /**
175    * Adds a attribute and value to the list. 
176    * @param name the name of the attribute.
177    * @param value the value of the attribute.
178    */
179   private void setAttribute (String name, String value) {
180     attr.set(name, value);
181   }
182 
183   /**
184    * Adds a attribute to the list using the given string. The string
185    * may either be in the form 'attribute' or 'attribute=value'.
186    * @param s contains the attribute information.
187     */
188   private void setAttribute (String s) {
189 
190     int idx;  // The index of the = sign in the string.
191     String name;  // Stores the name of the attribute.
192     String value;  // Stores the value of the attribute.
193 
194     // Check if the string is null.
195     if (s == null) return; 
196 
197     // Get the index of = within the string.
198     idx = s.indexOf('=');
199 
200     // Check if there was '=' character present.
201     if (idx < 0) {
202 
203       // If not, add the whole string as the attribute
204       // name with a null value.
205       setAttribute(s, "");
206     } else {
207 
208       // If so, split the string into a name and value.
209 
210       name = s.substring(0, idx);
211       value = s.substring(idx + 1);
212     
213       // Add the name and value to the attribute list.
214       setAttribute(name, value);
215     }
216   }
217 
218   /**
219    * Tokenizes the given string and uses the resulting vector
220    * to to build up the TagToken's attribute list.
221    * @param args the string to tokenize.
222    */
223   private void tokenizeAttributes (String args) {
224 
225     Vector v;    // Vector of tokens from the string.
226     Enumeration e;    // Enumeration of vector elements.
227     String[] tokens = null;  // Array of tokens from vector.
228     int length;    // Size of the vector.
229     int i;      // Loop variable.
230 
231     // Get the vector of tokens.
232     v = tokenizeString(args);
233 
234     // Check it is not null.
235     if (v == null) return;
236 
237     // Create a new String array.
238     length = v.size() - 1;
239     if (length > 0) tokens = new String[length];
240 
241     // Get an enumeration of the vector's elements.
242     e = v.elements();
243 
244     // Store the first element as the TagToken's name.
245     setName((String) e.nextElement());
246 
247     // Stop processing now if there are no more elements.
248     if (! e.hasMoreElements()) return;
249 
250     // Put the rest of the elements into the string array.
251     i = 0;
252     while (e.hasMoreElements())
253       tokens[i++] = (String) e.nextElement();
254     
255     // Deal with the name/value pairs with separate = signs.
256     for (i = 1; i < (length - 1); i++) {
257 
258       if (tokens[i] == null) continue;
259 
260       if (tokens[i].equals("=")) {
261         setAttribute(tokens[i - 1], tokens[i + 1]);
262         tokens[i] = null;
263         tokens[i - 1] = null;
264         tokens[i + 1] = null;
265       }
266     }
267 
268     // Deal with lone attributes and joined name/value pairs.
269     for (i = 0; i < length; i++)
270       if (tokens[i] != null) setAttribute(tokens[i]);
271   }
272 
273   /**
274    * This method tokenizes the given string and returns a vector
275    * of its constituent tokens. It understands quoting and character
276    * escapes.
277    * @param s the string to tokenize.
278    */
279   private Vector tokenizeString (String s) {
280 
281     // First check that the args are not null or zero-length.
282     if (s == null || s.length() == 0) return null;
283 
284     boolean whitespace = false; // True if we are reading w/space.
285     boolean escaped = false;    // True if next char is escaped.
286     boolean quoted = false;      // True if we are in quotes.
287     int length;        // Length of attribute string.
288     int i = 0;        // Loop variable.
289 
290     // Create a vector to store the complete tokens.
291     Vector tokens = new Vector();
292 
293     // Create a buffer to store an individual token.
294     StringBuffer buffer = new StringBuffer(80);
295 
296     // Convert the String to a character array;
297     char[] array = s.toCharArray();
298 
299     length = array.length;
300 
301     // Loop over the character array.
302     while (i < length) {
303 
304       // Check if we are currently removing whitespace.
305       if (whitespace) {
306         if (isWhitespace(array[i])) {
307           i++;
308           continue;
309         } else {
310           whitespace = false;
311         }
312       }
313 
314       // Check if we are currently escaped.
315       if (escaped) {
316 
317         // Add the next character to the array.
318         buffer.append(array[i++]);
319 
320         // Turn off the character escape.
321         escaped = false;
322 
323         continue;
324       } else {
325 
326         // Check for the escape character.
327         if (array[i] == ESCAPE) {
328           escaped = true;
329           i++;
330           continue;
331         }
332 
333         // Check for the quotation character.
334         if (array[i] == QUOTE) {
335           quoted = !quoted;
336           i++;
337           continue;
338         }
339 
340         // Check for the end of the token.
341         if (!quoted && isWhitespace(array[i])) {
342 
343           // Add the token and refresh the buffer.
344           tokens.addElement(buffer.toString());
345           buffer = new StringBuffer(80);
346 
347           // Stop reading the token.
348           whitespace = true;
349 
350           continue;
351         }
352 
353         // Otherwise add the character to the buffer.
354         buffer.append(array[i++]);
355       }
356     }
357 
358     // Add the last token to the vector if there is one.
359     if (! whitespace) tokens.addElement(buffer.toString());
360 
361     return tokens;
362   }
363 
364   /**
365    * Returns true if the given character is considered to be
366    * whitespace.
367    * @param c the character to test.
368    */
369   private boolean isWhitespace (char c) {
370     return (c == ' ' || c == '\t' || c == '\n');
371   }
372 }