Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/arthurdo/parser/HtmlTag.java


1   /*
2    * Copyright (c) 1996, 2001 by Arthur Do <arthur@cs.stanford.edu>.
3    * All Rights Reserved.
4    *
5    * This program is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation; either version 2 of the License, or
8    * (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with this program; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   */
19  
20  package com.arthurdo.parser;
21  
22  import java.util.*;
23  
24  /**
25   * HtmlTag is a helper class to store parsed tag information.
26   *
27   * @version 2.01 09/12/97
28   * @author Arthur Do <arthur@cs.stanford.edu>
29   * @see     com.arthurdo.parser.HtmlStreamTokenizer
30   */
31  public class HtmlTag
32  {
33    public HtmlTag()
34    {
35    }
36  
37    public HtmlTag(HtmlTag orig)
38    {
39      m_tag = new String(orig.m_tag);
40      m_ttype = orig.m_ttype;
41      m_endtag = orig.m_endtag;
42      m_names = (Vector)orig.m_names.clone();
43      m_values = (Vector)orig.m_values.clone();
44  //    m_params = (Hashtable)orig.m_params.clone();
45  //    m_originalParamNames = (Hashtable)orig.m_originalParamNames.clone();
46      m_empty = orig.m_empty;
47    }
48  
49    /**
50     * Sets the tag name.
51     *
52     * @param  tag  name of tag, e.g. "img"
53     * @exception  HtmlException  if malformed tag.
54     */
55    public void setTag(String tag)
56      throws HtmlException
57    {
58      try
59      {
60        m_tag = tag;
61        Object value = m_tags.get(tag.toUpperCase());
62        if (value != null)
63          m_ttype = ((Integer)value).intValue();
64      }
65      catch (StringIndexOutOfBoundsException e)
66      {
67        throw new HtmlException("invalid tag");
68      }
69    }
70  
71    /**
72     * @return  tag type, e.g. one of the <b>T_</b> constants.
73     */
74    public int getTagType()
75    {
76      return m_ttype;
77    }
78  
79    /**
80     * @return  tag name, the same name as passed to the constructor.
81     */
82    public String getTagString()
83    {
84      return m_tag;
85    }
86  
87    /**
88     * @return  this is an end tag or not, i.e. if the tag has a slash before the name.
89     */
90    public boolean isEndTag()
91    {
92      return m_endtag;
93    }
94  
95    /**
96     * Looks up a tag param name and returns the associated
97     * value, if any. Try to use the predefined <b>P_</b> constants.
98     *
99     * @param  name  name of param
100    * @return  the value associated with the name, or null.
101    */
102   public String getParam(String name)
103   {
104     final int idx = indexOfName(name);
105     if (idx != -1)
106       return (String)m_values.elementAt(idx);
107 
108     return null;
109   }
110 
111   /**
112    * Looks up a tag param name (by position)
113    *
114    * @param  i  The index of the param in the list (starting at 0).
115    * @return  The name of the indexed param
116    */
117   public String getParamName(int i)
118   {
119     return (String)m_names.elementAt(i);
120   }
121 
122   /**
123    * Looks up a tag param value (by position)
124    *
125    * @param  i  The index of the param in the list (starting at 0).
126    * @return  The value of the indexed param
127    */
128   public String getParamValue(int i)
129   {
130     return (String)m_values.elementAt(i);
131   }
132 
133   /**
134    * Looks up a tag param name and returns the associated
135    * value, if any. Try to use the predefined <b>P_</b> constants.
136    *
137    * @param  name  name of param, must be lowercase
138    * @return  the integer value associated with the name.
139    * @exception  NumberFormatException  if value is not a number.
140    */
141   public int getIntParam(String name)
142     throws NumberFormatException
143   {
144     return Integer.parseInt(getParam(name));
145   }
146 
147   /**
148    * Determines if tag has a particular parameter.
149    *
150    * @param  name  name of param, must be lowercase
151    * @return  true if tag contains parameter, false otherwise.
152    */
153   public boolean hasParam(String name)
154   {
155     return getParam(name) != null;
156   }
157 
158   /**
159    * Associates a param name with a value.
160    *
161    * @param  name  name of param
162    * @param  value  value associated with name
163    */
164   public void setParam(String name, String value)
165   {
166     m_names.addElement(name);
167     m_values.addElement(value);
168   }
169 
170   public void setWhitespace(String name, String whitespaceBefore, String whitespaceAfter)
171   {
172   }
173 
174   /**
175    * Remove association of a param name with a value.
176    *
177    * @param  name  name of param to remove
178    */
179   public void removeParam(String name)
180   {
181     final int idx = indexOfName(name);
182     if (idx != -1)
183     {
184       m_names.removeElementAt(idx);
185       m_values.removeElementAt(idx);
186     }
187   }
188 
189   /**
190    * @return  an enumeration of the parameter names.
191    */
192   public Enumeration getParamNames()
193   {
194     return m_names.elements();
195   }
196 
197   /**
198    * @return  an enumeration of the parameter values.
199    */
200   public Enumeration getParamValues()
201   {
202     return m_values.elements();
203   }
204 
205   /**
206    * @return  the number of params.
207    */
208   public int getParamCount()
209   {
210     return m_names.size();
211   }
212 
213   /**
214    * An empty tag ends with a '/'.
215    *
216    * @return  true if empty tag, false otherwise.
217    */
218   public boolean isEmpty()
219   {
220     return m_empty;
221   }
222 
223   /**
224    * @return  string representation of tag
225    */
226   public String toString()
227   {
228     StringBuffer tag = new StringBuffer();
229 
230     tag.append('<');
231     if (isEndTag())
232       tag.append(HtmlStreamTokenizer.C_ENDTAG);
233     tag.append(getTagString());
234 
235     final int size = m_names.size();
236     for (int i=0; i<size; i++)
237     {
238       String name = (String)m_names.elementAt(i);
239       tag.append(" " + name);
240       String value = (String)m_values.elementAt(i);
241       if (value.length() > 0)
242         tag.append("=\"" + value + "\"");
243     }
244     if (isEmpty())
245       tag.append(" /");
246     tag.append('>');
247 
248     return tag.toString();
249   }
250 
251   /**
252    * Reset tag to original state, as if it was just constructed.
253    */
254   public void reset()
255   {
256     m_tag = null;
257     m_ttype = T_UNKNOWN;
258     m_endtag = false;
259     m_names.removeAllElements();
260     m_values.removeAllElements();
261     m_empty = false;
262   }
263 
264 
265   public static final int T_UNKNOWN = 0;
266   public static final int T_A = 1;
267   public static final int T_ABBREV = 2;
268   public static final int T_ACRONYM = 3;
269   public static final int T_ADDRESS = 4;
270   public static final int T_APPLET = 5;
271   public static final int T_AREA = 6;
272   public static final int T_AU = 7;
273   public static final int T_B = 8;
274   public static final int T_BANNER = 9;
275   public static final int T_BASE = 10;
276   public static final int T_BASEFONT = 11;
277   public static final int T_BGSOUND = 12;
278   public static final int T_BIG = 13;
279   public static final int T_BLINK = 14;
280   public static final int T_BLOCKQUOTE = 15;
281   public static final int T_BODY = 16;
282   public static final int T_BR = 17;
283   public static final int T_CAPTION = 18;
284   public static final int T_CENTER = 19;
285   public static final int T_CITE = 20;
286   public static final int T_CODE = 21;
287   public static final int T_COL = 22;
288   public static final int T_COLGROUP = 23;
289   public static final int T_CREDIT = 24;
290   public static final int T_DD = 25;
291   public static final int T_DEL = 26;
292   public static final int T_DFN = 27;
293   public static final int T_DIR = 28;
294   public static final int T_DIV = 29;
295   public static final int T_DL = 30;
296   public static final int T_DT = 31;
297   public static final int T_EM = 32;
298   public static final int T_EMBED = 33;
299   public static final int T_FIG = 34;
300   public static final int T_FN = 35;
301   public static final int T_FONT = 36;
302   public static final int T_FORM = 37;
303   public static final int T_FRAME = 38;
304   public static final int T_FRAMESET = 39;
305   public static final int T_H1 = 40;
306   public static final int T_H2 = 41;
307   public static final int T_H3 = 42;
308   public static final int T_H4 = 43;
309   public static final int T_H5 = 44;
310   public static final int T_H6 = 45;
311   public static final int T_HEAD = 46;
312   public static final int T_HTML = 47;
313   public static final int T_HR = 48;
314   public static final int T_I = 49;
315   public static final int T_IMG = 50;
316   public static final int T_INPUT = 51;
317   public static final int T_INS = 52;
318   public static final int T_ISINDEX = 53;
319   public static final int T_KBD = 54;
320   public static final int T_LANG = 55;
321   public static final int T_LH = 56;
322   public static final int T_LI = 57;
323   public static final int T_LINK = 58;
324   public static final int T_MAP = 59;
325   public static final int T_MARQUEE = 60;
326   public static final int T_MENU = 61;
327   public static final int T_META = 62;
328   public static final int T_NEXTID = 63;
329   public static final int T_NOBR = 64;
330   public static final int T_NOEMBED = 65;
331   public static final int T_NOFRAME = 66;
332   public static final int T_NOFRAMES = 67;
333   public static final int T_NOTE = 68;
334   public static final int T_OBJECT = 69;
335   public static final int T_OL = 70;
336   public static final int T_OPTION = 71;
337   public static final int T_OVERLAY = 72;
338   public static final int T_P = 73;
339   public static final int T_PARAM = 74;
340   public static final int T_PERSON = 75;
341   public static final int T_PRE = 76;
342   public static final int T_Q = 77;
343   public static final int T_RANGE = 78;
344   public static final int T_S = 79;
345   public static final int T_SAMP = 80;
346   public static final int T_SCRIPT = 81;
347   public static final int T_SELECT = 82;
348   public static final int T_SMALL = 83;
349   public static final int T_SPOT = 84;
350   public static final int T_STRONG = 85;
351   public static final int T_STYLE = 86;
352   public static final int T_SUB = 87;
353   public static final int T_SUP = 88;
354   public static final int T_TAB = 89;
355   public static final int T_TABLE = 90;
356   public static final int T_TBODY = 91;
357   public static final int T_TD = 92;
358   public static final int T_TEXTAREA = 93;
359   public static final int T_TFOOT = 94;
360   public static final int T_TH = 95;
361   public static final int T_THEAD = 96;
362   public static final int T_TITLE = 97;
363   public static final int T_TR = 98;
364   public static final int T_TT = 99;
365   public static final int T_U = 100;
366   public static final int T_UL = 101;
367   public static final int T_VAR = 102;
368   public static final int T_WBR = 103;
369   public static final int T_IFRAME = 104;
370   /**
371    * <!DOCTYPE ...>
372    */
373   public static final int T__DOCTYPE = 105;
374 
375   public static final String P_ALIGN = new String("align");
376   public static final String P_BACKGROUND = new String("background");
377   public static final String P_BORDER = new String("border");
378   public static final String P_CHECKED = new String("checked");
379   public static final String P_CLEAR = new String("clear");
380   public static final String P_CODE = new String("code");
381   public static final String P_COLS = new String("cols");
382   public static final String P_COLSPAN = new String("colspan");
383   public static final String P_FACE = new String("face");
384   public static final String P_HEIGHT = new String("height");
385   public static final String P_HREF = new String("href");
386   public static final String P_LANGUAGE = new String("language");
387   public static final String P_LOWSRC = new String("lowsrc");
388   public static final String P_MAXLENGTH = new String("maxlength");
389   public static final String P_MULTIPLE = new String("multiple");
390   public static final String P_NAME = new String("name");
391   public static final String P_ROWS = new String("rows");
392   public static final String P_ROWSPAN = new String("rowspan");
393   public static final String P_SIZE = new String("size");
394   public static final String P_SRC = new String("src");
395   public static final String P_TARGET = new String("target");
396   public static final String P_TYPE = new String("type");
397   public static final String P_VALUE = new String("value");
398   public static final String P_VALUETYPE = new String("valuetype");
399   public static final String P_WIDTH = new String("width");
400 
401   public static final String P_CITE = new String("cite");
402   public static final String P_PROFILE = new String("profile");
403   public static final String P_ACTION = new String("action");
404   public static final String P_LONGDESC = new String("longdesc");
405   public static final String P_FOR = new String("for");
406   public static final String P_USEMAP = new String("usemap");
407   public static final String P_CODEBASE = new String("codebase");
408   public static final String P_DATA = new String("data");
409   public static final String P_ARCHIVE = new String("archive");
410   public static final String P_REL = new String("rel");
411   public static final String P_REV = new String("rev");
412 
413   //////////////////////////////////////////////////////////////////////
414 
415   /**
416    * Sets whether a tag is an end tag or not.
417    */
418   protected void setEndTag(boolean endtag)
419   {
420     m_endtag = endtag;
421   }
422 
423   /**
424    * Sets whether a tag is empty or not. An empty tag ends with a '/'.
425    */
426   protected void setEmpty(boolean empty)
427   {
428     m_empty = empty;
429   }
430 
431   private final int indexOfName(String name)
432   {
433     final int size = m_names.size();
434     for (int i=0; i<size; i++)
435       if (name.equalsIgnoreCase((String)m_names.elementAt(i)))
436         return i;
437 
438     return -1;
439   }
440 
441   private String m_tag = null;
442   private int m_ttype = T_UNKNOWN;
443   private boolean m_endtag = false;
444   private Vector m_names = new Vector();
445   private Vector m_values = new Vector();
446   private static Hashtable m_tags = new Hashtable();
447   private boolean m_empty = false;
448 
449   static
450   {
451     m_tags.put(new String("A"), new Integer(T_A));
452     m_tags.put(new String("ABBREV"), new Integer(T_ABBREV));
453     m_tags.put(new String("ACRONYM"), new Integer(T_ACRONYM));
454     m_tags.put(new String("ADDRESS"), new Integer(T_ADDRESS));
455     m_tags.put(new String("APPLET"), new Integer(T_APPLET));
456     m_tags.put(new String("AREA"), new Integer(T_AREA));
457     m_tags.put(new String("AU"), new Integer(T_AU));
458     m_tags.put(new String("B"), new Integer(T_B));
459     m_tags.put(new String("BANNER"), new Integer(T_BANNER));
460     m_tags.put(new String("BASE"), new Integer(T_BASE));
461     m_tags.put(new String("BASEFONT"), new Integer(T_BASEFONT));
462     m_tags.put(new String("BGSOUND"), new Integer(T_BGSOUND));
463     m_tags.put(new String("BIG"), new Integer(T_BIG));
464     m_tags.put(new String("BLINK"), new Integer(T_BLINK));
465     m_tags.put(new String("BLOCKQUOTE"), new Integer(T_BLOCKQUOTE));
466     m_tags.put(new String("BODY"), new Integer(T_BODY));
467     m_tags.put(new String("BR"), new Integer(T_BR));
468     m_tags.put(new String("CAPTION"), new Integer(T_CAPTION));
469     m_tags.put(new String("CENTER"), new Integer(T_CENTER));
470     m_tags.put(new String("CITE"), new Integer(T_CITE));
471     m_tags.put(new String("CODE"), new Integer(T_CODE));
472     m_tags.put(new String("COL"), new Integer(T_COL));
473     m_tags.put(new String("COLGROUP"), new Integer(T_COLGROUP));
474     m_tags.put(new String("CREDIT"), new Integer(T_CREDIT));
475     m_tags.put(new String("DD"), new Integer(T_DD));
476     m_tags.put(new String("DEL"), new Integer(T_DEL));
477     m_tags.put(new String("DFN"), new Integer(T_DFN));
478     m_tags.put(new String("DIR"), new Integer(T_DIR));
479     m_tags.put(new String("DIV"), new Integer(T_DIV));
480     m_tags.put(new String("DL"), new Integer(T_DL));
481     m_tags.put(new String("!DOCTYPE"), new Integer(T__DOCTYPE));
482     m_tags.put(new String("DT"), new Integer(T_DT));
483     m_tags.put(new String("EM"), new Integer(T_EM));
484     m_tags.put(new String("EMBED"), new Integer(T_EMBED));
485     m_tags.put(new String("FIG"), new Integer(T_FIG));
486     m_tags.put(new String("FN"), new Integer(T_FN));
487     m_tags.put(new String("FONT"), new Integer(T_FONT));
488     m_tags.put(new String("FORM"), new Integer(T_FORM));
489     m_tags.put(new String("FRAME"), new Integer(T_FRAME));
490     m_tags.put(new String("FRAMESET"), new Integer(T_FRAMESET));
491     m_tags.put(new String("H1"), new Integer(T_H1));
492     m_tags.put(new String("H2"), new Integer(T_H2));
493     m_tags.put(new String("H3"), new Integer(T_H3));
494     m_tags.put(new String("H4"), new Integer(T_H4));
495     m_tags.put(new String("H5"), new Integer(T_H5));
496     m_tags.put(new String("H6"), new Integer(T_H6));
497     m_tags.put(new String("HEAD"), new Integer(T_HEAD));
498     m_tags.put(new String("HTML"), new Integer(T_HTML));
499     m_tags.put(new String("HR"), new Integer(T_HR));
500     m_tags.put(new String("I"), new Integer(T_I));
501     m_tags.put(new String("IMG"), new Integer(T_IMG));
502     m_tags.put(new String("INPUT"), new Integer(T_INPUT));
503     m_tags.put(new String("INS"), new Integer(T_INS));
504     m_tags.put(new String("ISINDEX"), new Integer(T_ISINDEX));
505     m_tags.put(new String("KBD"), new Integer(T_KBD));
506     m_tags.put(new String("LANG"), new Integer(T_LANG));
507     m_tags.put(new String("LH"), new Integer(T_LH));
508     m_tags.put(new String("LI"), new Integer(T_LI));
509     m_tags.put(new String("LINK"), new Integer(T_LINK));
510     m_tags.put(new String("MAP"), new Integer(T_MAP));
511     m_tags.put(new String("MARQUEE"), new Integer(T_MARQUEE));
512     m_tags.put(new String("MENU"), new Integer(T_MENU));
513     m_tags.put(new String("META"), new Integer(T_META));
514     m_tags.put(new String("NEXTID"), new Integer(T_NEXTID));
515     m_tags.put(new String("NOBR"), new Integer(T_NOBR));
516     m_tags.put(new String("NOEMBED"), new Integer(T_NOEMBED));
517     m_tags.put(new String("NOFRAME"), new Integer(T_NOFRAME));
518     m_tags.put(new String("NOFRAMES"), new Integer(T_NOFRAMES));
519     m_tags.put(new String("NOTE"), new Integer(T_NOTE));
520     m_tags.put(new String("OBJECT"), new Integer(T_OBJECT));
521     m_tags.put(new String("OL"), new Integer(T_OL));
522     m_tags.put(new String("OPTION"), new Integer(T_OPTION));
523     m_tags.put(new String("OVERLAY"), new Integer(T_OVERLAY));
524     m_tags.put(new String("P"), new Integer(T_P));
525     m_tags.put(new String("PARAM"), new Integer(T_PARAM));
526     m_tags.put(new String("PERSON"), new Integer(T_PERSON));
527     m_tags.put(new String("PRE"), new Integer(T_PRE));
528     m_tags.put(new String("Q"), new Integer(T_Q));
529     m_tags.put(new String("RANGE"), new Integer(T_RANGE));
530     m_tags.put(new String("S"), new Integer(T_S));
531     m_tags.put(new String("SAMP"), new Integer(T_SAMP));
532     m_tags.put(new String("SCRIPT"), new Integer(T_SCRIPT));
533     m_tags.put(new String("SELECT"), new Integer(T_SELECT));
534     m_tags.put(new String("SMALL"), new Integer(T_SMALL));
535     m_tags.put(new String("SPOT"), new Integer(T_SPOT));
536     m_tags.put(new String("STRONG"), new Integer(T_STRONG));
537     m_tags.put(new String("STYLE"), new Integer(T_STYLE));
538     m_tags.put(new String("SUB"), new Integer(T_SUB));
539     m_tags.put(new String("SUP"), new Integer(T_SUP));
540     m_tags.put(new String("TAB"), new Integer(T_TAB));
541     m_tags.put(new String("TABLE"), new Integer(T_TABLE));
542     m_tags.put(new String("TBODY"), new Integer(T_TBODY));
543     m_tags.put(new String("TD"), new Integer(T_TD));
544     m_tags.put(new String("TEXTAREA"), new Integer(T_TEXTAREA));
545     m_tags.put(new String("TFOOT"), new Integer(T_TFOOT));
546     m_tags.put(new String("TH"), new Integer(T_TH));
547     m_tags.put(new String("THEAD"), new Integer(T_THEAD));
548     m_tags.put(new String("TITLE"), new Integer(T_TITLE));
549     m_tags.put(new String("TR"), new Integer(T_TR));
550     m_tags.put(new String("TT"), new Integer(T_TT));
551     m_tags.put(new String("U"), new Integer(T_U));
552     m_tags.put(new String("UL"), new Integer(T_UL));
553     m_tags.put(new String("VAR"), new Integer(T_VAR));
554     m_tags.put(new String("WBR"), new Integer(T_WBR));
555 
556     m_tags.put(new String("IFRAME"), new Integer(T_IFRAME));
557   }
558 }
559