Save This Page
Home » openjdk-7 » javax » swing » text » html » parser » [javadoc | source]
    1   /*
    2    * Copyright 1998-2006 Sun Microsystems, Inc.  All Rights Reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Sun designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Sun in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   22    * CA 95054 USA or visit www.sun.com if you need additional information or
   23    * have any questions.
   24    */
   25   
   26   package javax.swing.text.html.parser;
   27   
   28   import java.io.PrintStream;
   29   import java.io.File;
   30   import java.io.FileInputStream;
   31   import java.io.InputStream;
   32   import java.io.IOException;
   33   import java.io.FileNotFoundException;
   34   import java.io.BufferedInputStream;
   35   import java.io.DataInputStream;
   36   import java.util.Hashtable;
   37   import java.util.Vector;
   38   import java.util.BitSet;
   39   import java.util.StringTokenizer;
   40   import java.util.Enumeration;
   41   import java.util.Properties;
   42   import java.net.URL;
   43   
   44   /**
   45    * The representation of an SGML DTD.  DTD describes a document
   46    * syntax and is used in parsing of HTML documents.  It contains
   47    * a list of elements and their attributes as well as a list of
   48    * entities defined in the DTD.
   49    *
   50    * @see Element
   51    * @see AttributeList
   52    * @see ContentModel
   53    * @see Parser
   54    * @author Arthur van Hoff
   55    */
   56   public
   57   class DTD implements DTDConstants {
   58       public String name;
   59       public Vector<Element> elements = new Vector<Element>();
   60       public Hashtable<String,Element> elementHash
   61           = new Hashtable<String,Element>();
   62       public Hashtable<Object,Entity> entityHash
   63           = new Hashtable<Object,Entity>();
   64       public final Element pcdata = getElement("#pcdata");
   65       public final Element html = getElement("html");
   66       public final Element meta = getElement("meta");
   67       public final Element base = getElement("base");
   68       public final Element isindex = getElement("isindex");
   69       public final Element head = getElement("head");
   70       public final Element body = getElement("body");
   71       public final Element applet = getElement("applet");
   72       public final Element param = getElement("param");
   73       public final Element p = getElement("p");
   74       public final Element title = getElement("title");
   75       final Element style = getElement("style");
   76       final Element link = getElement("link");
   77       final Element script = getElement("script");
   78   
   79       public static final int FILE_VERSION = 1;
   80   
   81       /**
   82        * Creates a new DTD with the specified name.
   83        * @param name the name, as a <code>String</code> of the new DTD
   84        */
   85       protected DTD(String name) {
   86           this.name = name;
   87           defEntity("#RE", GENERAL, '\r');
   88           defEntity("#RS", GENERAL, '\n');
   89           defEntity("#SPACE", GENERAL, ' ');
   90           defineElement("unknown", EMPTY, false, true, null, null, null, null);
   91       }
   92   
   93       /**
   94        * Gets the name of the DTD.
   95        * @return the name of the DTD
   96        */
   97       public String getName() {
   98           return name;
   99       }
  100   
  101       /**
  102        * Gets an entity by name.
  103        * @return the <code>Entity</code> corresponding to the
  104        *   <code>name</code> <code>String</code>
  105        */
  106       public Entity getEntity(String name) {
  107           return (Entity)entityHash.get(name);
  108       }
  109   
  110       /**
  111        * Gets a character entity.
  112        * @return the <code>Entity</code> corresponding to the
  113        *    <code>ch</code> character
  114        */
  115       public Entity getEntity(int ch) {
  116           return (Entity)entityHash.get(Integer.valueOf(ch));
  117       }
  118   
  119       /**
  120        * Returns <code>true</code> if the element is part of the DTD,
  121        * otherwise returns <code>false</code>.
  122        *
  123        * @param  name the requested <code>String</code>
  124        * @return <code>true</code> if <code>name</code> exists as
  125        *   part of the DTD, otherwise returns <code>false</code>
  126        */
  127       boolean elementExists(String name) {
  128           return !"unknown".equals(name) && (elementHash.get(name) != null);
  129       }
  130   
  131       /**
  132        * Gets an element by name. A new element is
  133        * created if the element doesn't exist.
  134        *
  135        * @param name the requested <code>String</code>
  136        * @return the <code>Element</code> corresponding to
  137        *   <code>name</code>, which may be newly created
  138        */
  139       public Element getElement(String name) {
  140           Element e = (Element)elementHash.get(name);
  141           if (e == null) {
  142               e = new Element(name, elements.size());
  143               elements.addElement(e);
  144               elementHash.put(name, e);
  145           }
  146           return e;
  147       }
  148   
  149       /**
  150        * Gets an element by index.
  151        *
  152        * @param index the requested index
  153        * @return the <code>Element</code> corresponding to
  154        *   <code>index</code>
  155        */
  156       public Element getElement(int index) {
  157           return (Element)elements.elementAt(index);
  158       }
  159   
  160       /**
  161        * Defines an entity.  If the <code>Entity</code> specified
  162        * by <code>name</code>, <code>type</code>, and <code>data</code>
  163        * exists, it is returned; otherwise a new <code>Entity</code>
  164        * is created and is returned.
  165        *
  166        * @param name the name of the <code>Entity</code> as a <code>String</code>
  167        * @param type the type of the <code>Entity</code>
  168        * @param data the <code>Entity</code>'s data
  169        * @return the <code>Entity</code> requested or a new <code>Entity</code>
  170        *   if not found
  171        */
  172       public Entity defineEntity(String name, int type, char data[]) {
  173           Entity ent = (Entity)entityHash.get(name);
  174           if (ent == null) {
  175               ent = new Entity(name, type, data);
  176               entityHash.put(name, ent);
  177               if (((type & GENERAL) != 0) && (data.length == 1)) {
  178                   switch (type & ~GENERAL) {
  179                     case CDATA:
  180                     case SDATA:
  181                         entityHash.put(Integer.valueOf(data[0]), ent);
  182                       break;
  183                   }
  184               }
  185           }
  186           return ent;
  187       }
  188   
  189       /**
  190        * Returns the <code>Element</code> which matches the
  191        * specified parameters.  If one doesn't exist, a new
  192        * one is created and returned.
  193        *
  194        * @param name the name of the <code>Element</code>
  195        * @param type the type of the <code>Element</code>
  196        * @param omitStart <code>true</code> if start should be omitted
  197        * @param omitEnd  <code>true</code> if end should be omitted
  198        * @param content  the <code>ContentModel</code>
  199        * @param atts the <code>AttributeList</code> specifying the
  200        *    <code>Element</code>
  201        * @return the <code>Element</code> specified
  202        */
  203       public Element defineElement(String name, int type,
  204                          boolean omitStart, boolean omitEnd, ContentModel content,
  205                          BitSet exclusions, BitSet inclusions, AttributeList atts) {
  206           Element e = getElement(name);
  207           e.type = type;
  208           e.oStart = omitStart;
  209           e.oEnd = omitEnd;
  210           e.content = content;
  211           e.exclusions = exclusions;
  212           e.inclusions = inclusions;
  213           e.atts = atts;
  214           return e;
  215       }
  216   
  217       /**
  218        * Defines attributes for an {@code Element}.
  219        *
  220        * @param name the name of the <code>Element</code>
  221        * @param atts the <code>AttributeList</code> specifying the
  222        *    <code>Element</code>
  223        */
  224       public void defineAttributes(String name, AttributeList atts) {
  225           Element e = getElement(name);
  226           e.atts = atts;
  227       }
  228   
  229       /**
  230        * Creates and returns a character <code>Entity</code>.
  231        * @param name the entity's name
  232        * @return the new character <code>Entity</code>
  233        */
  234       public Entity defEntity(String name, int type, int ch) {
  235           char data[] = {(char)ch};
  236           return defineEntity(name, type, data);
  237       }
  238   
  239       /**
  240        * Creates and returns an <code>Entity</code>.
  241        * @param name the entity's name
  242        * @return the new <code>Entity</code>
  243        */
  244       protected Entity defEntity(String name, int type, String str) {
  245           int len = str.length();
  246           char data[] = new char[len];
  247           str.getChars(0, len, data, 0);
  248           return defineEntity(name, type, data);
  249       }
  250   
  251       /**
  252        * Creates and returns an <code>Element</code>.
  253        * @param name the element's name
  254        * @return the new <code>Element</code>
  255        */
  256       protected Element defElement(String name, int type,
  257                          boolean omitStart, boolean omitEnd, ContentModel content,
  258                          String[] exclusions, String[] inclusions, AttributeList atts) {
  259           BitSet excl = null;
  260           if (exclusions != null && exclusions.length > 0) {
  261               excl = new BitSet();
  262               for (int i = 0; i < exclusions.length; i++) {
  263                   String str = exclusions[i];
  264                   if (str.length() > 0) {
  265                       excl.set(getElement(str).getIndex());
  266                   }
  267               }
  268           }
  269           BitSet incl = null;
  270           if (inclusions != null && inclusions.length > 0) {
  271               incl = new BitSet();
  272               for (int i = 0; i < inclusions.length; i++) {
  273                   String str = inclusions[i];
  274                   if (str.length() > 0) {
  275                       incl.set(getElement(str).getIndex());
  276                   }
  277               }
  278           }
  279           return defineElement(name, type, omitStart, omitEnd, content, excl, incl, atts);
  280       }
  281   
  282       /**
  283        * Creates and returns an <code>AttributeList</code>.
  284        * @param name the attribute list's name
  285        * @return the new <code>AttributeList</code>
  286        */
  287       protected AttributeList defAttributeList(String name, int type, int modifier, String value, String values, AttributeList atts) {
  288           Vector vals = null;
  289           if (values != null) {
  290               vals = new Vector();
  291               for (StringTokenizer s = new StringTokenizer(values, "|") ; s.hasMoreTokens() ;) {
  292                   String str = s.nextToken();
  293                   if (str.length() > 0) {
  294                       vals.addElement(str);
  295                   }
  296               }
  297           }
  298           return new AttributeList(name, type, modifier, value, vals, atts);
  299       }
  300   
  301       /**
  302        * Creates and returns a new content model.
  303        * @param type the type of the new content model
  304        * @return the new <code>ContentModel</code>
  305        */
  306       protected ContentModel defContentModel(int type, Object obj, ContentModel next) {
  307           return new ContentModel(type, obj, next);
  308       }
  309   
  310       /**
  311        * Returns a string representation of this DTD.
  312        * @return the string representation of this DTD
  313        */
  314       public String toString() {
  315           return name;
  316       }
  317   
  318       /**
  319        * The hashtable of DTDs.
  320        */
  321       static Hashtable dtdHash = new Hashtable();
  322   
  323     public static void putDTDHash(String name, DTD dtd) {
  324       dtdHash.put(name, dtd);
  325     }
  326       /**
  327        * Returns a DTD with the specified <code>name</code>.  If
  328        * a DTD with that name doesn't exist, one is created
  329        * and returned.  Any uppercase characters in the name
  330        * are converted to lowercase.
  331        *
  332        * @param name the name of the DTD
  333        * @return the DTD which corresponds to <code>name</code>
  334        */
  335       public static DTD getDTD(String name) throws IOException {
  336           name = name.toLowerCase();
  337           DTD dtd = (DTD)dtdHash.get(name);
  338           if (dtd == null)
  339             dtd = new DTD(name);
  340   
  341           return dtd;
  342       }
  343   
  344       /**
  345        * Recreates a DTD from an archived format.
  346        * @param in  the <code>DataInputStream</code> to read from
  347        */
  348       public void read(DataInputStream in) throws IOException {
  349           if (in.readInt() != FILE_VERSION) {
  350           }
  351   
  352           //
  353           // Read the list of names
  354           //
  355           String[] names = new String[in.readShort()];
  356           for (int i = 0; i < names.length; i++) {
  357               names[i] = in.readUTF();
  358           }
  359   
  360   
  361           //
  362           // Read the entities
  363           //
  364           int num = in.readShort();
  365           for (int i = 0; i < num; i++) {
  366               short nameId = in.readShort();
  367               int type = in.readByte();
  368               String name = in.readUTF();
  369               defEntity(names[nameId], type | GENERAL, name);
  370           }
  371   
  372           // Read the elements
  373           //
  374           num = in.readShort();
  375           for (int i = 0; i < num; i++) {
  376               short nameId = in.readShort();
  377               int type = in.readByte();
  378               byte flags = in.readByte();
  379               ContentModel m = readContentModel(in, names);
  380               String[] exclusions = readNameArray(in, names);
  381               String[] inclusions = readNameArray(in, names);
  382               AttributeList atts = readAttributeList(in, names);
  383               defElement(names[nameId], type,
  384                          ((flags & 0x01) != 0), ((flags & 0x02) != 0),
  385                          m, exclusions, inclusions, atts);
  386           }
  387       }
  388   
  389       private ContentModel readContentModel(DataInputStream in, String[] names)
  390                   throws IOException {
  391           byte flag = in.readByte();
  392           switch(flag) {
  393               case 0:             // null
  394                   return null;
  395               case 1: {           // content_c
  396                   int type = in.readByte();
  397                   ContentModel m = readContentModel(in, names);
  398                   ContentModel next = readContentModel(in, names);
  399                   return defContentModel(type, m, next);
  400               }
  401               case 2: {           // content_e
  402                   int type = in.readByte();
  403                   Element el = getElement(names[in.readShort()]);
  404                   ContentModel next = readContentModel(in, names);
  405                   return defContentModel(type, el, next);
  406               }
  407           default:
  408                   throw new IOException("bad bdtd");
  409           }
  410       }
  411   
  412       private String[] readNameArray(DataInputStream in, String[] names)
  413                   throws IOException {
  414           int num = in.readShort();
  415           if (num == 0) {
  416               return null;
  417           }
  418           String[] result = new String[num];
  419           for (int i = 0; i < num; i++) {
  420               result[i] = names[in.readShort()];
  421           }
  422           return result;
  423       }
  424   
  425   
  426       private AttributeList readAttributeList(DataInputStream in, String[] names)
  427                   throws IOException  {
  428           AttributeList result = null;
  429           for (int num = in.readByte(); num > 0; --num) {
  430               short nameId = in.readShort();
  431               int type = in.readByte();
  432               int modifier = in.readByte();
  433               short valueId = in.readShort();
  434               String value = (valueId == -1) ? null : names[valueId];
  435               Vector values = null;
  436               short numValues = in.readShort();
  437               if (numValues > 0) {
  438                   values = new Vector(numValues);
  439                   for (int i = 0; i < numValues; i++) {
  440                       values.addElement(names[in.readShort()]);
  441                   }
  442               }
  443   result = new AttributeList(names[nameId], type, modifier, value,
  444                                          values, result);
  445               // We reverse the order of the linked list by doing this, but
  446               // that order isn't important.
  447           }
  448           return result;
  449       }
  450   
  451   }

Save This Page
Home » openjdk-7 » javax » swing » text » html » parser » [javadoc | source]