Source code: com/hexidec/ekit/component/parser/DTD.java
1 /*
2 * @(#)DTD.java 1.14 01/12/03
3 *
4 * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
5 * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6 */
7
8 package com.hexidec.ekit.component.parser;
9
10 import java.io.PrintStream;
11 import java.io.File;
12 import java.io.FileInputStream;
13 import java.io.InputStream;
14 import java.io.IOException;
15 import java.io.FileNotFoundException;
16 import java.io.BufferedInputStream;
17 import java.io.DataInputStream;
18 import java.util.Hashtable;
19 import java.util.Vector;
20 import java.util.BitSet;
21 import java.util.StringTokenizer;
22 import java.util.Enumeration;
23 import java.util.Properties;
24 import java.net.URL;
25
26 /**
27 * The representation of an SGML DTD. DTD describes a document
28 * syntax and is used in parsing of HTML documents. It contains
29 * a list of elements and their attributes as well as a list of
30 * entities defined in the DTD.
31 *
32 * @see Element
33 * @see AttributeList
34 * @see ContentModel
35 * @see Parser
36 * @author Arthur van Hoff
37 * @version 1.14 12/03/01
38 */
39 public
40 class DTD implements DTDConstants {
41 public String name;
42 public Vector elements = new Vector();
43 public Hashtable elementHash = new Hashtable();
44 public Hashtable entityHash = new Hashtable();
45 public final Element pcdata = getElement("#pcdata");
46 public final Element html = getElement("html");
47 public final Element meta = getElement("meta");
48 public final Element base = getElement("base");
49 public final Element isindex = getElement("isindex");
50 public final Element head = getElement("head");
51 public final Element body = getElement("body");
52 public final Element applet = getElement("applet");
53 public final Element param = getElement("param");
54 public final Element p = getElement("p");
55
56
57 // Added to default, temporary
58 public final Element font = getElement("font");
59 public final Element h1 = getElement("h1");
60 public final Element h2 = getElement("h2");
61 public final Element h3 = getElement("h3");
62 public final Element h4 = getElement("h4");
63 public final Element h5 = getElement("h5");
64 public final Element h6 = getElement("h6");
65 public final Element div = getElement("div");
66 public final Element br = getElement("br");
67 public final Element hr = getElement("hr");
68 public final Element u = getElement("u");
69 public final Element a = getElement("a");
70 public final Element b = getElement("b");
71 public final Element i = getElement("i");
72 public final Element sup = getElement("sup");
73 public final Element sub = getElement("sub");
74 public final Element strike = getElement("strike");
75 public final Element pre = getElement("pre");
76 public final Element big = getElement("big");
77 public final Element small = getElement("small");
78
79 public final Element title = getElement("title");
80 final Element style = getElement("style");
81 final Element link = getElement("link");
82
83 public static int FILE_VERSION = 1;
84
85 /**
86 * Creates a new DTD with the specified name.
87 * @param name the name, as a <code>String</code> of the new DTD
88 */
89 protected DTD(String name) {
90 this.name = name;
91 defEntity("#RE", GENERAL, '\r');
92 defEntity("#RS", GENERAL, '\n');
93 defEntity("#SPACE", GENERAL, ' ');
94 defineElement("unknown", EMPTY, false, true, null, null, null, null);
95 /*
96 //Define Possible General Entity references
97 defEntity("amp",GENERAL,'&');
98 defEntity("lt",GENERAL,'<');
99 defEntity("gt",GENERAL,'>');
100 defEntity("quot",GENERAL,'\"');
101 defEntity("apos",GENERAL,'\'');
102
103 //Imported Names
104 defEntity("ContentType",PARAMETER,"CDATA");
105 defEntity("ContentTypes",PARAMETER,"CDATA");
106 defEntity("Charset",PARAMETER,"CDATA");
107 defEntity("Charsets",PARAMETER,"CDATA");
108 defEntity("LanguageCode",PARAMETER,"NMTOKEN");
109 defEntity("Character",PARAMETER,"CDATA");
110 defEntity("Number",PARAMETER,"CDATA");
111 defEntity("LinkTypes",PARAMETER,"CDATA");
112 defEntity("MediaDesc",PARAMETER,"CDATA");
113 defEntity("URI",PARAMETER,"CDATA");
114 defEntity("UriList",PARAMETER,"CDATA");
115 defEntity("Datetime",PARAMETER,"CDATA");
116 defEntity("Script",PARAMETER,"CDATA");
117 defEntity("StyleSheet",PARAMETER,"CDATA");
118 defEntity("Text",PARAMETER,"CDATA");
119 defEntity("FrameTarget",PARAMETER,"NMTOKEN");
120 defEntity("Length",PARAMETER,"CDATA");
121 defEntity("MultiLength",PARAMETER,"CDATA");
122 defEntity("Pixels",PARAMETER,"CDATA");
123 defEntity("StyleSheet",PARAMETER,"CDATA");
124 defEntity("Text",PARAMETER,"CDATA");
125
126 //Image maps
127 defEntity("Shape",PARAMETER,"(rect|circle|poly|default)");
128 defEntity("Coords",PARAMETER,"CDATA");
129 defEntity("ImgAlign",PARAMETER,"(top|middle|bottom|left|right)");
130 defEntity("Color",PARAMETER,"CDATA");
131 */
132
133
134 }
135
136 /**
137 * Gets the name of the DTD.
138 * @return the name of the DTD
139 */
140 public String getName() {
141 return name;
142 }
143
144 /**
145 * Gets an entity by name.
146 * @return the <code>Entity</code> corresponding to the
147 * <code>name</code> <code>String</code>
148 */
149 public Entity getEntity(String name) {
150 return (Entity)entityHash.get(name);
151 }
152
153 /**
154 * Gets a character entity.
155 * @return the <code>Entity</code> corresponding to the
156 * <code>ch</code> character
157 */
158 public Entity getEntity(int ch) {
159 return (Entity)entityHash.get(new Integer(ch));
160 }
161
162 /**
163 * Returns <code>true</code> if the element is part of the DTD,
164 * otherwise returns <code>false</code>.
165 *
166 * @param name the requested <code>String</code>
167 * @return <code>true</code> if <code>name</code> exists as
168 * part of the DTD, otherwise returns <code>false</code>
169 */
170 boolean elementExists(String name) {
171 Element e = (Element)elementHash.get(name);
172 return ((e == null) ? false : true);
173 }
174
175 /**
176 * Gets an element by name. A new element is
177 * created if the element doesn't exist.
178 *
179 * @param name the requested <code>String</code>
180 * @return the <code>Element</code> corresponding to
181 * <code>name</code>, which may be newly created
182 */
183 public Element getElement(String name) {
184 Element e = (Element)elementHash.get(name);
185 if (e == null) {
186 e = new Element(name, elements.size());
187 elements.addElement(e);
188 elementHash.put(name, e);
189 }
190 return e;
191 }
192
193 /**
194 * Gets an element by index.
195 *
196 * @param index the requested index
197 * @return the <code>Element</code> corresponding to
198 * <code>index</code>
199 */
200 public Element getElement(int index) {
201 return (Element)elements.elementAt(index);
202 }
203
204 /**
205 * Defines an entity. If the <code>Entity</code> specified
206 * by <code>name</code>, <code>type</code>, and <code>data</code>
207 * exists, it is returned; otherwise a new <code>Entity</code>
208 * is created and is returned.
209 *
210 * @param name the name of the <code>Entity</code> as a <code>String</code>
211 * @param type the type of the <code>Entity</code>
212 * @param data the <code>Entity</code>'s data
213 * @return the <code>Entity</code> requested or a new <code>Entity</code>
214 * if not found
215 */
216 public Entity defineEntity(String name, int type, char data[]) {
217 Entity ent = (Entity)entityHash.get(name);
218 if (ent == null) {
219 ent = new Entity(name, type, data);
220 entityHash.put(name, ent);
221 if (((type & GENERAL) != 0) && (data.length == 1)) {
222 switch (type & ~GENERAL) {
223 case CDATA:
224 case SDATA:
225 entityHash.put(new Integer(data[0]), ent);
226 break;
227 }
228 }
229 }
230 return ent;
231 }
232
233 /**
234 * Returns the <code>Element</code> which matches the
235 * specified parameters. If one doesn't exist, a new
236 * one is created and returned.
237 *
238 * @param name the name of the <code>Element</code>
239 * @param type the type of the <code>Element</code>
240 * @param omitStart <code>true</code if start should be omitted
241 * @param omitEnd <code>true</code> if end should be omitted
242 * @param content the <code>ContentModel</code>
243 * @param atts the <code>AttributeList</code> specifying the
244 * <code>Element</code>
245 * @return the <code>Element</code> specified
246 */
247 public Element defineElement(String name, int type,
248 boolean omitStart, boolean omitEnd, ContentModel content,
249 BitSet exclusions, BitSet inclusions, AttributeList atts) {
250 Element e = getElement(name);
251 e.type = type;
252 e.oStart = omitStart;
253 e.oEnd = omitEnd;
254 e.content = content;
255 e.exclusions = exclusions;
256 e.inclusions = inclusions;
257 e.atts = atts;
258 return e;
259 }
260
261 /**
262 * Returns the <code>Element</code> which matches the
263 * specified <code>AttributeList</code>.
264 * If one doesn't exist, a new one is created and returned.
265 *
266 * @param name the name of the <code>Element</code>
267 * @param atts the <code>AttributeList</code> specifying the
268 * <code>Element</code>
269 * @return the <code>Element</code> specified
270 */
271 public void defineAttributes(String name, AttributeList atts) {
272 Element e = getElement(name);
273 e.atts = atts;
274 }
275
276 /**
277 * Creates and returns a character <code>Entity</code>.
278 * @param name the entity's name
279 * @return the new character <code>Entity</code>
280 */
281 public Entity defEntity(String name, int type, int ch) {
282 char data[] = {(char)ch};
283 return defineEntity(name, type, data);
284 }
285
286 /**
287 * Creates and returns an <code>Entity</code>.
288 * @param name the entity's name
289 * @return the new <code>Entity</code>
290 */
291 protected Entity defEntity(String name, int type, String str) {
292 int len = str.length();
293 char data[] = new char[len];
294 str.getChars(0, len, data, 0);
295 return defineEntity(name, type, data);
296 }
297
298 /**
299 * Creates and returns an <code>Element</code>.
300 * @param the element's name
301 * @return the new <code>Element</code>
302 */
303 protected Element defElement(String name, int type,
304 boolean omitStart, boolean omitEnd, ContentModel content,
305 String[] exclusions, String[] inclusions, AttributeList atts) {
306 BitSet excl = null;
307 if (exclusions != null && exclusions.length > 0) {
308 excl = new BitSet();
309 for (int i = 0; i < exclusions.length; i++) {
310 String str = exclusions[i];
311 if (str.length() > 0) {
312 excl.set(getElement(str).getIndex());
313 }
314 }
315 }
316 BitSet incl = null;
317 if (inclusions != null && inclusions.length > 0) {
318 incl = new BitSet();
319 for (int i = 0; i < inclusions.length; i++) {
320 String str = inclusions[i];
321 if (str.length() > 0) {
322 incl.set(getElement(str).getIndex());
323 }
324 }
325 }
326 return defineElement(name, type, omitStart, omitEnd, content, excl, incl, atts);
327 }
328
329 /**
330 * Creates and returns an <code>AttributeList</code>.
331 * @param name the attribute list's name
332 * @return the new <code>AttributeList</code>
333 */
334 protected AttributeList defAttributeList(String name, int type, int modifier, String value, String values, AttributeList atts) {
335 Vector vals = null;
336 if (values != null) {
337 vals = new Vector();
338 for (StringTokenizer s = new StringTokenizer(values, "|") ; s.hasMoreTokens() ;) {
339 String str = s.nextToken();
340 if (str.length() > 0) {
341 vals.addElement(str);
342 }
343 }
344 }
345 return new AttributeList(name, type, modifier, value, vals, atts);
346 }
347
348 /**
349 * Creates and returns a new content model.
350 * @param type the type of the new content model
351 * @return the new <code>ContentModel</code>
352 */
353 protected ContentModel defContentModel(int type, Object obj, ContentModel next) {
354 return new ContentModel(type, obj, next);
355 }
356
357 /**
358 * Returns a string representation of this DTD.
359 * @return the string representation of this DTD
360 */
361 public String toString() {
362 return name;
363 }
364
365 /**
366 * The hashtable of DTDs.
367 */
368 static Hashtable dtdHash = new Hashtable();
369
370 public static void putDTDHash(String name, DTD dtd) {
371 dtdHash.put(name, dtd);
372 }
373 /**
374 * Returns a DTD with the specified <code>name</code>. If
375 * a DTD with that name doesn't exist, one is created
376 * and returned. Any uppercase characters in the name
377 * are converted to lowercase.
378 *
379 * @param the name of the DTD
380 * @return the DTD which corresponds to <code>name</code>
381 */
382 public static DTD getDTD(String name) throws IOException {
383 name = name.toLowerCase();
384 DTD dtd = (DTD)dtdHash.get(name);
385 if (dtd == null)
386 dtd = new DTD(name);
387
388 return dtd;
389 }
390
391 /**
392 * Recreates a DTD from an archived format.
393 * @param in the <code>DataInputStream</code> to read from
394 */
395 public void read(DataInputStream in) throws IOException {
396 if (in.readInt() != FILE_VERSION) {
397 }
398
399 //
400 // Read the list of names
401 //
402 String[] names = new String[in.readShort()];
403 for (int i = 0; i < names.length; i++) {
404 names[i] = in.readUTF();
405 }
406
407
408 //
409 // Read the entities
410 //
411 int num = in.readShort();
412 for (int i = 0; i < num; i++) {
413 short nameId = in.readShort();
414 int type = in.readByte();
415 String name = in.readUTF();
416 defEntity(names[nameId], type | GENERAL, name);
417 }
418
419 // Read the elements
420 //
421 num = in.readShort();
422 for (int i = 0; i < num; i++) {
423 short nameId = in.readShort();
424 int type = in.readByte();
425 byte flags = in.readByte();
426 ContentModel m = readContentModel(in, names);
427 String[] exclusions = readNameArray(in, names);
428 String[] inclusions = readNameArray(in, names);
429 AttributeList atts = readAttributeList(in, names);
430 defElement(names[nameId], type,
431 ((flags & 0x01) != 0), ((flags & 0x02) != 0),
432 m, exclusions, inclusions, atts);
433 }
434 }
435
436 private ContentModel readContentModel(DataInputStream in, String[] names)
437 throws IOException {
438 byte flag = in.readByte();
439 switch(flag) {
440 case 0: // null
441 return null;
442 case 1: { // content_c
443 int type = in.readByte();
444 ContentModel m = readContentModel(in, names);
445 ContentModel next = readContentModel(in, names);
446 return defContentModel(type, m, next);
447 }
448 case 2: { // content_e
449 int type = in.readByte();
450 Element el = getElement(names[in.readShort()]);
451 ContentModel next = readContentModel(in, names);
452 return defContentModel(type, el, next);
453 }
454 default:
455 throw new IOException("bad bdtd");
456 }
457 }
458
459 private String[] readNameArray(DataInputStream in, String[] names)
460 throws IOException {
461 int num = in.readShort();
462 if (num == 0) {
463 return null;
464 }
465 String[] result = new String[num];
466 for (int i = 0; i < num; i++) {
467 result[i] = names[in.readShort()];
468 }
469 return result;
470 }
471
472
473 private AttributeList readAttributeList(DataInputStream in, String[] names)
474 throws IOException {
475 AttributeList result = null;
476 for (int num = in.readByte(); num > 0; --num) {
477 short nameId = in.readShort();
478 int type = in.readByte();
479 int modifier = in.readByte();
480 short valueId = in.readShort();
481 String value = (valueId == -1) ? null : names[valueId];
482 Vector values = null;
483 short numValues = in.readShort();
484 if (numValues > 0) {
485 values = new Vector(numValues);
486 for (int i = 0; i < numValues; i++) {
487 values.addElement(names[in.readShort()]);
488 }
489 }
490 result = new AttributeList(names[nameId], type, modifier, value,
491 values, result);
492 // We reverse the order of the linked list by doing this, but
493 // that order isn't important.
494 }
495 return result;
496 }
497
498 }