Source code: cvu/html/HTMLNode.java
1 /*
2 * HTML Parser
3 * Copyright (C) 1997 David McNicol
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file COPYING for more details.
14 */
15
16 package cvu.html;
17
18 import java.util.Hashtable;
19 import java.util.Vector;
20 import java.util.Enumeration;
21 import java.io.DataOutputStream;
22 import java.io.IOException;
23
24 /**
25 * This class represents a single node within an HTML tree. Each node
26 * has a name, zero or more attributes and possibly some content. Nodes
27 * can appear within the content of other nodes. <p>
28 * End tags do not appear since they only indicate 'end-of-content'. To
29 * prevent the system searching for the end of standalone tags, a dynamic
30 * list has been implemented. When the HTMLNode class is resolved
31 * a setup method is called adding a set of default standalone tags
32 * to the list. Standalone tags can then be added and removed dynamically
33 * using static method calls. <p>
34 * The list is the only way the internal code can tell
35 * whether a tag is standalone. If a problem occurs the tree structure
36 * would still be sound, but it would not be accurate, so while the form
37 * of the HTML would be conserved, searches would not operate correctly.
38 * @see HTMLTree
39 * @author <a href="http://www.strath.ac.uk/~ras97108/">David McNicol</a>
40 */
41 public class HTMLNode {
42
43 private HTMLNode parent; // Refers to this node's parent.
44 private String name; // Stores the name of the HTML node.
45 private AttributeList attr; // List of element's attributes.
46 private Vector children; // Stores the HTML node's children.
47 private boolean hidden; // True if the node is not to be printed.
48
49 /**
50 * Constructs a new HTMLNode.
51 * @param tag the TagToken representing the start of this node.
52 * @param standalone true if the tag does not have any content.
53 * @param src enumeration of tag tokens.
54 */
55 public HTMLNode (TagToken tag, HTMLNode parent, Enumeration src) {
56
57 // Store the reference to the node's parent.
58 this.parent = parent;
59
60 // Set the node to be unhidden by default.
61 hidden = false;
62
63 // Check if the given tag is null.
64 if (tag != null) {
65
66 // Store the node's name.
67 name = tag.getName();
68
69 // Store the node's attribute list.
70 attr = tag.getAttributes();
71
72 // Get the node's children if needed.
73 if (HTMLNode.isStandalone(name))
74 children = null;
75 else
76 children = parseChildren(src);
77 } else {
78
79 // Otherwise, set the name and attributes to null.
80 name = null;
81 attr = null;
82
83 // Get the node's children from the enumeration.
84 children = parseChildren(src);
85 }
86 }
87
88 /**
89 * Constructs a new, detached HTMLNode with the specified name.
90 * @param name the name of the new node.
91 */
92 public HTMLNode (String name) {
93
94 // Store the name of the node.
95 this.name = name;
96
97 // The node will have no parent till it is added to a tree.
98 parent = null;
99
100 // Create a new attribute list.
101 attr = new AttributeList();
102
103 // Create space for children if the node is not standalone.
104 if (HTMLNode.isStandalone(name))
105 children = null;
106 else
107 children = new Vector();
108 }
109
110 /**
111 * Returns the name of this node.
112 */
113 public String getName () {
114 return name;
115 }
116
117 /**
118 * Returns the node's parent node.
119 */
120 public HTMLNode getParent () {
121 return parent;
122 }
123
124 /**
125 * Returns the node's children.
126 */
127 public Enumeration getChildren () {
128
129 // Return nothing if the node has any children.
130 if (children == null) return null;
131
132 return children.elements();
133 }
134
135 /**
136 * Returns true if the node is currently hidden.
137 */
138 public boolean isHidden () {
139 return hidden;
140 }
141
142 /**
143 * Hides the node.
144 */
145 public void hide () {
146 hidden = true;
147 }
148
149 /**
150 * "Unhides" the node.
151 */
152 public void unhide () {
153 hidden = false;
154 }
155
156 /**
157 * Returns the value of the attribute with the given name.
158 * @param name the name of the attribute.
159 */
160 public String getAttribute (String name) {
161
162 // Check that the attribute list is there.
163 if (attr == null) return null;
164
165 // Return the value associated with the attribute name.
166 return (String) attr.get(name);
167 }
168
169 /**
170 * Returns an enumeration of attributes defined in this node.
171 */
172 public Enumeration getAttributes () {
173
174 // Check that the attribute list has been defined.
175 if (attr == null) return null;
176
177 // Return an enumeration of all of the attribute names.
178 return attr.names();
179 }
180
181 /**
182 * Returns an attribute with all double quote characters
183 * escaped with a backslash.
184 * @param name the name of the attribute.
185 */
186 public String getQuotedAttribute (String name) {
187
188 // Check that the attribute list is there.
189 if (attr == null) return null;
190
191 // Return the quoted version.
192 return attr.getQuoted(name);
193 }
194
195 /**
196 * Returns a string version of the attribute and its value.
197 * @param name the name of the attribute.
198 */
199 public String getAttributeToString (String name) {
200
201 // Check that the attribute list is there.
202 if (attr == null) return null;
203
204 // Return the string version.
205 return attr.toString(name);
206 }
207
208 /**
209 * Returns a string version of the HTMLNode. If the node is
210 * currently hidden then return an empty string.
211 */
212 public String toString () {
213
214 StringBuffer sb; // Stores the string to be returned.
215 Enumeration list; // List of node's attributes or children.
216
217 // Get a new StringBuffer.
218 sb = new StringBuffer();
219
220 if (! hidden) {
221
222 // Write the opening of the tag.
223 sb.append('<');
224
225 // Write the tag's name.
226 sb.append(name);
227
228 // Check if there are any attributes.
229 if (attr != null && attr.size() > 0) {
230
231 // Print string version of the attributes.
232 sb.append(" " + attr);
233 }
234
235 // Finish off the tag.
236 sb.append('>');
237 }
238
239 // Return if the node is standalone.
240 if (isStandalone(name)) return sb.toString();
241
242 // Otherwise, check if the node has any children.
243 if (children != null && children.size() > 0) {
244
245 // Get a list of all of the children.
246 list = children.elements();
247
248 while (list.hasMoreElements()) {
249
250 // Get the next node from the list.
251 Object o = list.nextElement();
252
253 // Write it.
254 sb.append(o.toString());
255 }
256 }
257
258 if (! hidden) {
259 // Write the end tag.
260 sb.append("</").append(name).append(">");
261 }
262
263 // Return the string version.
264 return sb.toString();
265 }
266
267 /**
268 * Sets the node's parent to the specified HTMLNode.
269 * @param parent the new parent.
270 */
271 public void setParent (HTMLNode parent) {
272 this.parent = parent;
273 }
274
275 /**
276 * Returns true if an attribute with the given name exists.
277 * @param name the name of the attribute.
278 */
279 public boolean isAttribute (String name) {
280
281 // Check that the attribute list is there.
282 if (attr == null) return false;
283
284 // Check the table for an attribute with that name.
285 return attr.exists(name);
286 }
287
288 /**
289 * Adds a new attribute to the node's attribute list with
290 * the specified value. If the attribute already exists the
291 * old value is overwritten.
292 * @param name the name of the attribute.
293 * @param value the value of the attribute.
294 */
295 public void addAttribute (String name, String value) {
296
297 // Return if the attribute list is not there.
298 if (attr == null) return;
299
300 // Otherwise, add the name/value pair to the list.
301 attr.set(name, value);
302 }
303
304 /**
305 * Adds an object to the end of this node's content
306 * @param child the node to be added.
307 */
308 public void addChild (Object child) {
309
310 // Return if the child is invalid.
311 if (child == null) return;
312
313 // Check that this node has no children.
314 if (children == null) return;
315
316 // Add the child if it is a string.
317 if (child instanceof String) {
318
319 children.addElement(child);
320 return;
321 }
322
323 // Add the child and set its parent if it is an HTMLNode.
324 if (child instanceof HTMLNode) {
325
326 children.addElement(child);
327 ((HTMLNode) child).setParent(this);
328 return;
329 }
330 }
331
332 /**
333 * Removes the specified HTMLNode from the current node's
334 * list of children.
335 * @param child the node to be removed.
336 */
337 public void removeChild (HTMLNode child) {
338
339 // Return if the child is not defined properly
340 if (child == null) return;
341
342 // Return if the list of children is not defined properly.
343 if (children == null) return;
344
345 // Otherwise, remove the child if it is on the list.
346 children.removeElement(child);
347 }
348
349 /**
350 * Adds an object to this node's content before
351 * the specified child node.
352 * @param child the object to be added.
353 * @param before the node before which the child will be placed.
354 */
355 public void addChildBefore (Object child, HTMLNode before) {
356
357 int total; // Total number of child nodes.
358 int idx; // Index of the 'before' node.
359
360 // Return if the child is invalid.
361 if (child == null) return;
362
363 // Return if this node has no children.
364 if (children == null) return;
365
366 // Add the child at the beginning if the before node is
367 // invalid.
368 if (before == null) {
369
370 addChild(child);
371 return;
372 }
373
374 total = children.size();
375 idx = children.indexOf(before);
376
377 // Add the child to the beginning if the 'before' node
378 // was not found.
379 if (idx < 0) idx = 0;
380
381 // Return if the child is not of the right type.
382 if (! ((child instanceof String) ||
383 (child instanceof HTMLNode))) return;
384
385 // Check if the 'before' node is the last node.
386 if (idx == total - 1) {
387
388 // Add the child to the end of the list.
389 children.addElement(child);
390 } else {
391
392 // Add the child before the 'before' node.
393 children.insertElementAt(child, idx);
394 }
395
396 // If the child is an HTMLNode, set its parent.
397 if (child instanceof HTMLNode)
398 ((HTMLNode) child).setParent(this);
399 }
400
401 /**
402 * Removes an attribute with the specified name from the
403 * attribute list.
404 * @param name the name of the attribute to remove.
405 */
406 public void removeAttribute (String name) {
407
408 // Return if the attribute list is not there.
409 if (attr == null) return;
410
411 // Otherwise, remove the attribute from the list.
412 attr.unset(name);
413 }
414
415 /**
416 * Returns the node after this one in the parent's
417 * list of children.
418 */
419 public HTMLNode nextSibling () {
420
421 // Return nothing if the node has no parent.
422 if (parent == null) return null;
423
424 // Ask the parent to return the node after this one.
425 return parent.nextChild(this);
426 }
427
428 /**
429 * Returns the node before this one in the parent's
430 * list of children.
431 */
432 public HTMLNode previousSibling () {
433
434 // Return nothing if the node has no parent.
435 if (parent == null) return null;
436
437 // Ask the parent to return the node before this one.
438 return parent.previousChild(this);
439 }
440
441 /**
442 * Returns the first child of this node.
443 */
444 public HTMLNode firstChild () {
445
446 Enumeration list; // Enumeration of this node's children.
447 Object curr; // Current node from the list.
448
449 // Return nothing if this node has no children.
450 if (children == null) return null;
451
452 // Return the first child node.
453 list = children.elements();
454
455 while (list.hasMoreElements()) {
456
457 curr = list.nextElement();
458
459 // Return the first HTMLNode in the list.
460 if (curr instanceof HTMLNode)
461 return (HTMLNode) curr;
462 }
463
464 // Return nothing if there were no HTMLNodes in the list.
465 return null;
466 }
467
468 /**
469 * Returns the HTMLNode after the specified one in this
470 * nodes content.
471 * @param child the HTMLNode before the one we want.
472 */
473 public HTMLNode nextChild (HTMLNode child) {
474
475 Enumeration list; // List of this node's children.
476 Object curr; // Current object from the list.
477 boolean getNext = false; // True when child has been found.
478
479 // Return nothing if this node has no children.
480 if (children == null) return null;
481
482 // Get a list of this node's children
483 list = children.elements();
484
485 while (list.hasMoreElements()) {
486
487 curr = list.nextElement();
488
489 // Check if we have found the specified child.
490 if (getNext) {
491
492 // Return the next HTMLNode we encounter.
493 if (curr instanceof HTMLNode)
494 return (HTMLNode) curr;
495 } else {
496
497 // Check if we have found the specified child.
498 if (curr == child) getNext = true;
499 }
500 }
501
502 return null;
503 }
504
505 /**
506 * Returns the HTMLNode before the specified one in this
507 * nodes content.
508 * @param child the HTMLNode after the one we want.
509 */
510 public HTMLNode previousChild (HTMLNode child) {
511
512 Enumeration list; // List of this node's children.
513 Object curr; // Current object from the list.
514 HTMLNode prev = null; // Stores last found HTMLNode.
515 boolean returnPrev = true; // True when child has been found.
516
517 // Return nothing if this node has no children.
518 if (children == null) return null;
519
520 // Get a list of this node's children
521 list = children.elements();
522
523 while (list.hasMoreElements()) {
524
525 curr = list.nextElement();
526
527 // Check if we have found the specified child.
528 if (curr == child) return prev;
529
530 // Check if curr is an HTMLNode.
531 if (curr instanceof HTMLNode) {
532
533 // Make curr the previously found HTMLNode.
534 prev = (HTMLNode) curr;
535 }
536 }
537
538 return null;
539 }
540
541 /**
542 * Parses the contents of this HTML node from the enumeration
543 * of tokens provided.
544 * @param src an enumeration of tokens.
545 */
546 private Vector parseChildren (Enumeration src) {
547
548 // Create a new Vector to store the contents.
549 Vector store = new Vector();
550
551 // Loop round the enumeration of tokens.
552 while (src.hasMoreElements()) {
553
554 // Get the next token from the enumeration.
555 Object token = src.nextElement();
556
557 // Check if the token is simple text.
558 if (token instanceof TextToken) {
559
560 // Cast the token into type TextToken.
561 TextToken text = (TextToken) token;
562
563 // Add the text string to the vector.
564 store.addElement(text.getText());
565
566 continue;
567 }
568
569 // Check if the token is a tag.
570 if (token instanceof TagToken) {
571
572 // Cast the token into type TagToken.
573 TagToken tag = (TagToken) token;
574
575 // Check if the token is an end tag.
576 if (tag.isEndTag()) {
577
578 // Break if the end tags name matches.
579 if (name != null &&
580 name.equals(tag.getName())) break;
581
582 // Otherwise ignore the end tag.
583 continue;
584 }
585
586 // Otherwise make it into an HTMLNode.
587 HTMLNode he =
588 new HTMLNode(tag, this, src);
589
590 // Add the node to the vector.
591 store.addElement(he);
592 }
593 }
594
595 if (store.size() > 0)
596 return store;
597 else
598 return null;
599 }
600
601 /**
602 * String of default node names which are standalone.
603 */
604 private static String[] defaultStandaloneList = {
605 "area", "base", "basefont", "bgsound", "br",
606 "col", "dd", "dl", "dt", "font", "frame",
607 "hr", "img", "input", "isindex", "li",
608 "link", "meta", "nextid", "option", "overlay", "p",
609 "param", "tab", "wbr", "!", "!--"
610 };
611
612 // Full list of standalone names.
613 private static Vector standaloneList = null;
614
615 // Load the default standalones into the list after class resolution.
616 static {
617 setupStandaloneList();
618 }
619
620 /**
621 * Utility method which people can use to find out exactly
622 * which nodes are in the default standalone list. The default
623 * list is printed to the standard output.
624 */
625 public static void printDefaultStandaloneList () {
626 System.out.println(defaultStandaloneList);
627 }
628
629 /**
630 * Adds the specified string to the standalone list.
631 * @param name the new standalone name.
632 */
633 public static void addStandalone (String name) {
634
635 // Check if the list has been initialized first.
636 if (standaloneList == null) return;
637
638 // Convert the String to lower case.
639 String lc = name.toLowerCase();
640
641 // Check that the list does not have the String already.
642 if (standaloneList.contains(lc)) return;
643
644 // Otherwise add the lowercase string to the list.
645 standaloneList.addElement(lc);
646 }
647
648 /**
649 * Removes the specified string from the standalone list.
650 * @param name the standalone name to remove.
651 */
652 public static void removeStandalone (String name) {
653
654 // Check if the standaloneList has been initialized first.
655 if (standaloneList == null) return;
656
657 // Convert the String to lower case.
658 String lc = name.toLowerCase();
659
660 // Remove the lowercase string from the list.
661 standaloneList.removeElement(lc);
662 }
663
664 /**
665 * Checks the standalone list to see if it mentions the specified
666 * tag name and returns true if so.
667 * @param name the tag name to check against the list.
668 */
669 public static boolean isStandalone (String name) {
670
671 // Check if the standaloneList has been initialized first.
672 if (standaloneList == null) return true;
673
674 // Otherwise check the list to see if it contains the tag name.
675 return standaloneList.contains(name);
676 }
677
678 /**
679 * Sets up the standalone vector at runtime using the list of
680 * default standalone tags. New standalone tags can then be added
681 * to the vector. <p>
682 * This method will only be executed once, since it is guarded
683 * by a private boolean variable.
684 */
685 private static void setupStandaloneList () {
686
687 // Create a new vector to store the defaults.
688 standaloneList = new Vector(defaultStandaloneList.length);
689
690 // Add all of the strings in the default list.
691 for (int i = 0; i < defaultStandaloneList.length; i++)
692 standaloneList.addElement(defaultStandaloneList[i]);
693 }
694 }