Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.xerces.impl;
   19   
   20   import java.io.CharConversionException;
   21   import java.io.EOFException;
   22   import java.io.IOException;
   23   
   24   import org.apache.xerces.impl.io.MalformedByteSequenceException;
   25   import org.apache.xerces.impl.msg.XMLMessageFormatter;
   26   import org.apache.xerces.util.AugmentationsImpl;
   27   import org.apache.xerces.util.XMLAttributesImpl;
   28   import org.apache.xerces.util.XMLChar;
   29   import org.apache.xerces.util.XMLStringBuffer;
   30   import org.apache.xerces.util.XMLSymbols;
   31   import org.apache.xerces.xni.Augmentations;
   32   import org.apache.xerces.xni.QName;
   33   import org.apache.xerces.xni.XMLAttributes;
   34   import org.apache.xerces.xni.XMLDocumentHandler;
   35   import org.apache.xerces.xni.XMLResourceIdentifier;
   36   import org.apache.xerces.xni.XMLString;
   37   import org.apache.xerces.xni.XNIException;
   38   import org.apache.xerces.xni.parser.XMLComponent;
   39   import org.apache.xerces.xni.parser.XMLComponentManager;
   40   import org.apache.xerces.xni.parser.XMLConfigurationException;
   41   import org.apache.xerces.xni.parser.XMLDocumentScanner;
   42   import org.apache.xerces.xni.parser.XMLInputSource;
   43   
   44   /**
   45    * This class is responsible for scanning the structure and content
   46    * of document fragments. The scanner acts as the source for the 
   47    * document information which is communicated to the document handler.
   48    * <p>
   49    * This component requires the following features and properties from the
   50    * component manager that uses it:
   51    * <ul>
   52    *  <li>http://xml.org/sax/features/validation</li>
   53    *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
   54    *  <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
   55    *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
   56    *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
   57    *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
   58    * </ul>
   59    * 
   60    * @xerces.internal
   61    *
   62    * @author Glenn Marcy, IBM
   63    * @author Andy Clark, IBM
   64    * @author Arnaud  Le Hors, IBM
   65    * @author Eric Ye, IBM
   66    *
   67    * @version $Id: XMLDocumentFragmentScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $
   68    */
   69   public class XMLDocumentFragmentScannerImpl
   70       extends XMLScanner
   71       implements XMLDocumentScanner, XMLComponent, XMLEntityHandler {
   72   
   73       //
   74       // Constants
   75       //
   76   
   77       // scanner states
   78   
   79       /** Scanner state: start of markup. */
   80       protected static final int SCANNER_STATE_START_OF_MARKUP = 1;
   81   
   82       /** Scanner state: comment. */
   83       protected static final int SCANNER_STATE_COMMENT = 2;
   84   
   85       /** Scanner state: processing instruction. */
   86       protected static final int SCANNER_STATE_PI = 3;
   87   
   88       /** Scanner state: DOCTYPE. */
   89       protected static final int SCANNER_STATE_DOCTYPE = 4;
   90   
   91       /** Scanner state: root element. */
   92       protected static final int SCANNER_STATE_ROOT_ELEMENT = 6;
   93   
   94       /** Scanner state: content. */
   95       protected static final int SCANNER_STATE_CONTENT = 7;
   96   
   97       /** Scanner state: reference. */
   98       protected static final int SCANNER_STATE_REFERENCE = 8;
   99   
  100       /** Scanner state: end of input. */
  101       protected static final int SCANNER_STATE_END_OF_INPUT = 13;
  102   
  103       /** Scanner state: terminated. */
  104       protected static final int SCANNER_STATE_TERMINATED = 14;
  105   
  106       /** Scanner state: CDATA section. */
  107       protected static final int SCANNER_STATE_CDATA = 15;
  108   
  109       /** Scanner state: Text declaration. */
  110       protected static final int SCANNER_STATE_TEXT_DECL = 16;
  111   
  112       // feature identifiers
  113   
  114       /** Feature identifier: namespaces. */
  115       protected static final String NAMESPACES = 
  116           Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
  117   
  118       /** Feature identifier: notify built-in refereces. */
  119       protected static final String NOTIFY_BUILTIN_REFS =
  120           Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
  121           
  122       // property identifiers
  123       
  124       /** Property identifier: entity resolver. */
  125       protected static final String ENTITY_RESOLVER =
  126           Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
  127       
  128       // recognized features and properties
  129   
  130       /** Recognized features. */
  131       private static final String[] RECOGNIZED_FEATURES = {
  132           NAMESPACES, 
  133           VALIDATION, 
  134           NOTIFY_BUILTIN_REFS,
  135           NOTIFY_CHAR_REFS, 
  136       };
  137   
  138       /** Feature defaults. */
  139       private static final Boolean[] FEATURE_DEFAULTS = {
  140           null,
  141           null,
  142           Boolean.FALSE,
  143           Boolean.FALSE,
  144       };
  145   
  146       /** Recognized properties. */
  147       private static final String[] RECOGNIZED_PROPERTIES = {
  148           SYMBOL_TABLE,
  149           ERROR_REPORTER,
  150           ENTITY_MANAGER,
  151           ENTITY_RESOLVER,
  152       };
  153   
  154       /** Property defaults. */
  155       private static final Object[] PROPERTY_DEFAULTS = {
  156           null,
  157           null,
  158           null,
  159           null,
  160       };
  161   
  162       // debugging
  163   
  164       /** Debug scanner state. */
  165       private static final boolean DEBUG_SCANNER_STATE = false;
  166   
  167       /** Debug dispatcher. */
  168       private static final boolean DEBUG_DISPATCHER = false;
  169   
  170       /** Debug content dispatcher scanning. */
  171       protected static final boolean DEBUG_CONTENT_SCANNING = false;
  172   
  173       //
  174       // Data
  175       //
  176   
  177       // protected data
  178   
  179       /** Document handler. */
  180       protected XMLDocumentHandler fDocumentHandler;
  181   
  182       /** Entity stack. */
  183       protected int[] fEntityStack = new int[4];
  184   
  185       /** Markup depth. */
  186       protected int fMarkupDepth;
  187   
  188       /** Scanner state. */
  189       protected int fScannerState;
  190   
  191       /** SubScanner state: inside scanContent method. */
  192       protected boolean fInScanContent = false;
  193   
  194       /** has external dtd */
  195       protected boolean fHasExternalDTD;
  196       
  197       /** Standalone. */
  198       protected boolean fStandalone;
  199       
  200       /** True if [Entity Declared] is a VC; false if it is a WFC. */
  201       protected boolean fIsEntityDeclaredVC;
  202       
  203       /** External subset resolver. **/
  204       protected ExternalSubsetResolver fExternalSubsetResolver;
  205   
  206       // element information
  207   
  208       /** Current element. */
  209       protected QName fCurrentElement;
  210   
  211       /** Element stack. */
  212       protected final ElementStack fElementStack = new ElementStack();
  213   
  214       // other info
  215   
  216       /** Document system identifier. 
  217        * REVISIT:  So what's this used for?  - NG
  218       * protected String fDocumentSystemId;
  219        ******/
  220   
  221       // features
  222   
  223       /** Notify built-in references. */
  224       protected boolean fNotifyBuiltInRefs = false;
  225   
  226       // dispatchers
  227   
  228       /** Active dispatcher. */
  229       protected Dispatcher fDispatcher;
  230   
  231       /** Content dispatcher. */
  232       protected final Dispatcher fContentDispatcher = createContentDispatcher();
  233   
  234       // temporary variables
  235   
  236       /** Element QName. */
  237       protected final QName fElementQName = new QName();
  238   
  239       /** Attribute QName. */
  240       protected final QName fAttributeQName = new QName();
  241   
  242       /** Element attributes. */
  243       protected final XMLAttributesImpl fAttributes = new XMLAttributesImpl();
  244   
  245       /** String. */
  246       protected final XMLString fTempString = new XMLString();
  247   
  248       /** String. */
  249       protected final XMLString fTempString2 = new XMLString();
  250   
  251       /** Array of 3 strings. */
  252       private final String[] fStrings = new String[3];
  253   
  254       /** String buffer. */
  255       private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  256   
  257       /** String buffer. */
  258       private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  259   
  260       /** Another QName. */
  261       private final QName fQName = new QName();
  262   
  263       /** Single character array. */
  264       private final char[] fSingleChar = new char[1];
  265       
  266       /** 
  267        * Saw spaces after element name or between attributes.
  268        * 
  269        * This is reserved for the case where scanning of a start element spans
  270        * several methods, as is the case when scanning the start of a root element 
  271        * where a DTD external subset may be read after scanning the element name.
  272        */
  273       private boolean fSawSpace;
  274       
  275       /** Reusable Augmentations. */
  276       private Augmentations fTempAugmentations = null;
  277   
  278       //
  279       // Constructors
  280       //
  281   
  282       /** Default constructor. */
  283       public XMLDocumentFragmentScannerImpl() {} // <init>()
  284   
  285       //
  286       // XMLDocumentScanner methods
  287       //
  288   
  289       /** 
  290        * Sets the input source. 
  291        *
  292        * @param inputSource The input source.
  293        *
  294        * @throws IOException Thrown on i/o error.
  295        */
  296       public void setInputSource(XMLInputSource inputSource) throws IOException {
  297           fEntityManager.setEntityHandler(this);
  298           fEntityManager.startEntity("$fragment$", inputSource, false, true);
  299           //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
  300       } // setInputSource(XMLInputSource)
  301   
  302       /** 
  303        * Scans a document.
  304        *
  305        * @param complete True if the scanner should scan the document
  306        *                 completely, pushing all events to the registered
  307        *                 document handler. A value of false indicates that
  308        *                 that the scanner should only scan the next portion
  309        *                 of the document and return. A scanner instance is
  310        *                 permitted to completely scan a document if it does
  311        *                 not support this "pull" scanning model.
  312        *
  313        * @return True if there is more to scan, false otherwise.
  314        */
  315       public boolean scanDocument(boolean complete) 
  316           throws IOException, XNIException {
  317           
  318           // reset entity scanner
  319           fEntityScanner = fEntityManager.getEntityScanner();
  320           
  321           // keep dispatching "events"
  322           fEntityManager.setEntityHandler(this);
  323           do {
  324               if (!fDispatcher.dispatch(complete)) {
  325                   return false;
  326               }
  327           } while (complete);
  328   
  329           // return success
  330           return true;
  331   
  332       } // scanDocument(boolean):boolean
  333   
  334       //
  335       // XMLComponent methods
  336       //
  337   
  338       /**
  339        * Resets the component. The component can query the component manager
  340        * about any features and properties that affect the operation of the
  341        * component.
  342        * 
  343        * @param componentManager The component manager.
  344        *
  345        * @throws SAXException Thrown by component on initialization error.
  346        *                      For example, if a feature or property is
  347        *                      required for the operation of the component, the
  348        *                      component manager may throw a 
  349        *                      SAXNotRecognizedException or a
  350        *                      SAXNotSupportedException.
  351        */
  352       public void reset(XMLComponentManager componentManager)
  353           throws XMLConfigurationException {
  354   
  355           super.reset(componentManager);
  356   
  357           // other settings
  358           //fDocumentSystemId = null;
  359   
  360           // sax features
  361           fAttributes.setNamespaces(fNamespaces);
  362   
  363           // initialize vars
  364           fMarkupDepth = 0;
  365           fCurrentElement = null;
  366           fElementStack.clear();
  367           fHasExternalDTD = false;
  368           fStandalone = false;
  369           fIsEntityDeclaredVC = false;
  370           fInScanContent = false;
  371   
  372   		// setup dispatcher
  373   		setScannerState(SCANNER_STATE_CONTENT);
  374   		setDispatcher(fContentDispatcher);
  375           
  376   
  377           if (fParserSettings) {
  378               // parser settings have changed. reset them.
  379           	
  380               // xerces features
  381               try {
  382                   fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS);
  383               } catch (XMLConfigurationException e) {
  384                   fNotifyBuiltInRefs = false;
  385               }
  386               
  387               // xerces properties
  388               try {
  389                   Object resolver = componentManager.getProperty(ENTITY_RESOLVER);
  390                   fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ?
  391                       (ExternalSubsetResolver) resolver : null;
  392               }
  393               catch (XMLConfigurationException e) {
  394                   fExternalSubsetResolver = null;
  395               }
  396           }
  397   
  398       } // reset(XMLComponentManager)
  399   
  400       /**
  401        * Returns a list of feature identifiers that are recognized by
  402        * this component. This method may return null if no features
  403        * are recognized by this component.
  404        */
  405       public String[] getRecognizedFeatures() {
  406           return (String[])(RECOGNIZED_FEATURES.clone());
  407       } // getRecognizedFeatures():String[]
  408   
  409       /**
  410        * Sets the state of a feature. This method is called by the component
  411        * manager any time after reset when a feature changes state. 
  412        * <p>
  413        * <strong>Note:</strong> Components should silently ignore features
  414        * that do not affect the operation of the component.
  415        * 
  416        * @param featureId The feature identifier.
  417        * @param state     The state of the feature.
  418        *
  419        * @throws SAXNotRecognizedException The component should not throw
  420        *                                   this exception.
  421        * @throws SAXNotSupportedException The component should not throw
  422        *                                  this exception.
  423        */
  424       public void setFeature(String featureId, boolean state)
  425           throws XMLConfigurationException {
  426   
  427           super.setFeature(featureId, state);
  428               
  429           // Xerces properties
  430           if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
  431               final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
  432               if (suffixLength == Constants.NOTIFY_BUILTIN_REFS_FEATURE.length() && 
  433                   featureId.endsWith(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
  434                   fNotifyBuiltInRefs = state;
  435               }
  436           }
  437   
  438       } // setFeature(String,boolean)
  439   
  440       /**
  441        * Returns a list of property identifiers that are recognized by
  442        * this component. This method may return null if no properties
  443        * are recognized by this component.
  444        */
  445       public String[] getRecognizedProperties() {
  446           return (String[])(RECOGNIZED_PROPERTIES.clone());
  447       } // getRecognizedProperties():String[]
  448   
  449       /**
  450        * Sets the value of a property. This method is called by the component
  451        * manager any time after reset when a property changes value. 
  452        * <p>
  453        * <strong>Note:</strong> Components should silently ignore properties
  454        * that do not affect the operation of the component.
  455        * 
  456        * @param propertyId The property identifier.
  457        * @param value      The value of the property.
  458        *
  459        * @throws SAXNotRecognizedException The component should not throw
  460        *                                   this exception.
  461        * @throws SAXNotSupportedException The component should not throw
  462        *                                  this exception.
  463        */
  464       public void setProperty(String propertyId, Object value)
  465           throws XMLConfigurationException {
  466           
  467           super.setProperty(propertyId, value);
  468   
  469           // Xerces properties
  470           if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
  471               final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
  472               if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 
  473                   propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
  474                   fEntityManager = (XMLEntityManager)value;
  475                   return;
  476               }
  477               if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 
  478                   propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
  479                   fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ?
  480                       (ExternalSubsetResolver) value : null;
  481                   return;
  482               }
  483           }
  484           
  485       } // setProperty(String,Object)
  486   
  487       /** 
  488        * Returns the default state for a feature, or null if this
  489        * component does not want to report a default value for this
  490        * feature.
  491        *
  492        * @param featureId The feature identifier.
  493        *
  494        * @since Xerces 2.2.0
  495        */
  496       public Boolean getFeatureDefault(String featureId) {
  497           for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
  498               if (RECOGNIZED_FEATURES[i].equals(featureId)) {
  499                   return FEATURE_DEFAULTS[i];
  500               }
  501           }
  502           return null;
  503       } // getFeatureDefault(String):Boolean
  504   
  505       /** 
  506        * Returns the default state for a property, or null if this
  507        * component does not want to report a default value for this
  508        * property. 
  509        *
  510        * @param propertyId The property identifier.
  511        *
  512        * @since Xerces 2.2.0
  513        */
  514       public Object getPropertyDefault(String propertyId) {
  515           for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
  516               if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
  517                   return PROPERTY_DEFAULTS[i];
  518               }
  519           }
  520           return null;
  521       } // getPropertyDefault(String):Object
  522   
  523       //
  524       // XMLDocumentSource methods
  525       //
  526   
  527       /**
  528        * setDocumentHandler
  529        * 
  530        * @param documentHandler 
  531        */
  532       public void setDocumentHandler(XMLDocumentHandler documentHandler) {
  533           fDocumentHandler = documentHandler;
  534       } // setDocumentHandler(XMLDocumentHandler)
  535   
  536   
  537       /** Returns the document handler */
  538       public XMLDocumentHandler getDocumentHandler(){
  539           return fDocumentHandler;
  540       }
  541   
  542       //
  543       // XMLEntityHandler methods
  544       //
  545   
  546       /**
  547        * This method notifies of the start of an entity. The DTD has the
  548        * pseudo-name of "[dtd]" parameter entity names start with '%'; and
  549        * general entities are just specified by their name.
  550        * 
  551        * @param name     The name of the entity.
  552        * @param identifier The resource identifier.
  553        * @param encoding The auto-detected IANA encoding name of the entity
  554        *                 stream. This value will be null in those situations
  555        *                 where the entity encoding is not auto-detected (e.g.
  556        *                 internal entities or a document entity that is
  557        *                 parsed from a java.io.Reader).
  558        * @param augs     Additional information that may include infoset augmentations
  559        *
  560        * @throws XNIException Thrown by handler to signal an error.
  561        */
  562       public void startEntity(String name, 
  563                               XMLResourceIdentifier identifier,
  564                               String encoding, Augmentations augs) throws XNIException {
  565   
  566           // keep track of this entity before fEntityDepth is increased
  567           if (fEntityDepth == fEntityStack.length) {
  568               int[] entityarray = new int[fEntityStack.length * 2];
  569               System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length);
  570               fEntityStack = entityarray;
  571           }
  572           fEntityStack[fEntityDepth] = fMarkupDepth;
  573   
  574           super.startEntity(name, identifier, encoding, augs);
  575   
  576           // WFC:  entity declared in external subset in standalone doc
  577           if(fStandalone && fEntityManager.isEntityDeclInExternalSubset(name)) {
  578               reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
  579                   new Object[]{name});
  580           }
  581   
  582           // call handler
  583           if (fDocumentHandler != null && !fScanningAttribute) {
  584               if (!name.equals("[xml]")) {
  585                   fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs);
  586               }
  587           }
  588   
  589       } // startEntity(String,XMLResourceIdentifier,String)
  590   
  591       /**
  592        * This method notifies the end of an entity. The DTD has the pseudo-name
  593        * of "[dtd]" parameter entity names start with '%'; and general entities 
  594        * are just specified by their name.
  595        * 
  596        * @param name The name of the entity.
  597        * @param augs Additional information that may include infoset augmentations
  598        *
  599        * @throws XNIException Thrown by handler to signal an error.
  600        */
  601       public void endEntity(String name, Augmentations augs) throws XNIException {
  602   
  603           // flush possible pending output buffer - see scanContent
  604           if (fInScanContent && fStringBuffer.length != 0
  605               && fDocumentHandler != null) {
  606               fDocumentHandler.characters(fStringBuffer, null);
  607               fStringBuffer.length = 0; // make sure we know it's been flushed
  608           }
  609   
  610           super.endEntity(name, augs);
  611   
  612           // make sure markup is properly balanced
  613           if (fMarkupDepth != fEntityStack[fEntityDepth]) {
  614               reportFatalError("MarkupEntityMismatch", null);
  615           }
  616   
  617           // call handler
  618           if (fDocumentHandler != null && !fScanningAttribute) {
  619               if (!name.equals("[xml]")) {
  620                   fDocumentHandler.endGeneralEntity(name, augs);
  621               }
  622           }
  623           
  624       } // endEntity(String)
  625   
  626       //
  627       // Protected methods
  628       //
  629   
  630       // dispatcher factory methods
  631   
  632       /** Creates a content dispatcher. */
  633       protected Dispatcher createContentDispatcher() {
  634           return new FragmentContentDispatcher();
  635       } // createContentDispatcher():Dispatcher
  636   
  637       // scanning methods
  638   
  639       /**
  640        * Scans an XML or text declaration.
  641        * <p>
  642        * <pre>
  643        * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  644        * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  645        * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
  646        * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  647        * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
  648        *                 | ('"' ('yes' | 'no') '"'))
  649        *
  650        * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
  651        * </pre>
  652        *
  653        * @param scanningTextDecl True if a text declaration is to
  654        *                         be scanned instead of an XML
  655        *                         declaration.
  656        */
  657       protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 
  658           throws IOException, XNIException {
  659   
  660           // scan decl
  661           super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
  662           fMarkupDepth--;
  663   
  664           // pseudo-attribute values
  665           String version = fStrings[0];
  666           String encoding = fStrings[1];
  667           String standalone = fStrings[2];
  668   
  669           // set standalone
  670           fStandalone = standalone != null && standalone.equals("yes");
  671           fEntityManager.setStandalone(fStandalone);
  672           
  673           // set version on reader
  674           fEntityScanner.setXMLVersion(version);
  675   
  676           // call handler
  677           if (fDocumentHandler != null) {
  678               if (scanningTextDecl) {
  679                   fDocumentHandler.textDecl(version, encoding, null);
  680               }
  681               else {
  682                   fDocumentHandler.xmlDecl(version, encoding, standalone, null);
  683               }
  684           }
  685   
  686           // set encoding on reader
  687           if (encoding != null && !fEntityScanner.fCurrentEntity.isEncodingExternallySpecified()) {
  688               fEntityScanner.setEncoding(encoding);
  689           }
  690   
  691       } // scanXMLDeclOrTextDecl(boolean)
  692   
  693       /**
  694        * Scans a processing data. This is needed to handle the situation
  695        * where a document starts with a processing instruction whose 
  696        * target name <em>starts with</em> "xml". (e.g. xmlfoo)
  697        *
  698        * @param target The PI target
  699        * @param data The string to fill in with the data
  700        */
  701       protected void scanPIData(String target, XMLString data) 
  702           throws IOException, XNIException {
  703   
  704           super.scanPIData(target, data);
  705           fMarkupDepth--;
  706   
  707           // call handler
  708           if (fDocumentHandler != null) {
  709               fDocumentHandler.processingInstruction(target, data, null);
  710           }
  711   
  712       } // scanPIData(String)
  713   
  714       /**
  715        * Scans a comment.
  716        * <p>
  717        * <pre>
  718        * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  719        * </pre>
  720        * <p>
  721        * <strong>Note:</strong> Called after scanning past '&lt;!--'
  722        */
  723       protected void scanComment() throws IOException, XNIException {
  724   
  725           scanComment(fStringBuffer);
  726           fMarkupDepth--;
  727   
  728           // call handler
  729           if (fDocumentHandler != null) {
  730               fDocumentHandler.comment(fStringBuffer, null);
  731           }
  732   
  733       } // scanComment()
  734       
  735       /** 
  736        * Scans a start element. This method will handle the binding of
  737        * namespace information and notifying the handler of the start
  738        * of the element.
  739        * <p>
  740        * <pre>
  741        * [44] EmptyElemTag ::= '&lt;' Name (S Attribute)* S? '/>'
  742        * [40] STag ::= '&lt;' Name (S Attribute)* S? '>'
  743        * </pre> 
  744        * <p>
  745        * <strong>Note:</strong> This method assumes that the leading
  746        * '&lt;' character has been consumed.
  747        * <p>
  748        * <strong>Note:</strong> This method uses the fElementQName and
  749        * fAttributes variables. The contents of these variables will be
  750        * destroyed. The caller should copy important information out of
  751        * these variables before calling this method.
  752        *
  753        * @return True if element is empty. (i.e. It matches
  754        *          production [44].
  755        */
  756       protected boolean scanStartElement() 
  757           throws IOException, XNIException {
  758           if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement()");
  759   
  760           // name
  761           if (fNamespaces) {
  762               fEntityScanner.scanQName(fElementQName);
  763           }
  764           else {
  765               String name = fEntityScanner.scanName();
  766               fElementQName.setValues(null, name, name, null);
  767           }
  768           String rawname = fElementQName.rawname;
  769   
  770           // push element stack
  771           fCurrentElement = fElementStack.pushElement(fElementQName);
  772   
  773           // attributes
  774           boolean empty = false;
  775           fAttributes.removeAllAttributes();
  776           do {
  777               // spaces
  778               boolean sawSpace = fEntityScanner.skipSpaces();
  779   
  780               // end tag?
  781               int c = fEntityScanner.peekChar();
  782               if (c == '>') {
  783                   fEntityScanner.scanChar();
  784                   break;
  785               }
  786               else if (c == '/') {
  787                   fEntityScanner.scanChar();
  788                   if (!fEntityScanner.skipChar('>')) {
  789                       reportFatalError("ElementUnterminated",
  790                                        new Object[]{rawname});
  791                   }
  792                   empty = true;
  793                   break;
  794               }
  795               else if (!isValidNameStartChar(c) || !sawSpace) {
  796                   // Second chance. Check if this character is a high
  797                   // surrogate of a valid name start character.
  798                   if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
  799                       reportFatalError("ElementUnterminated",
  800                                        new Object[] { rawname });
  801                   }
  802               }
  803   
  804               // attributes
  805               scanAttribute(fAttributes);
  806   
  807           } while (true);
  808   
  809           // call handler
  810           if (fDocumentHandler != null) {
  811               if (empty) {
  812   
  813                   //decrease the markup depth..
  814                   fMarkupDepth--;
  815                   // check that this element was opened in the same entity
  816                   if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
  817                       reportFatalError("ElementEntityMismatch",
  818                                        new Object[]{fCurrentElement.rawname});
  819                   }
  820   
  821                   fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
  822   
  823                   //pop the element off the stack..
  824                   fElementStack.popElement(fElementQName);
  825               }
  826               else {
  827                   fDocumentHandler.startElement(fElementQName, fAttributes, null);
  828               }
  829           }
  830   
  831           if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElement(): "+empty);
  832           return empty;
  833   
  834       } // scanStartElement():boolean
  835       
  836       /**
  837        * Scans the name of an element in a start or empty tag. 
  838        * 
  839        * @see #scanStartElement()
  840        */
  841       protected void scanStartElementName ()
  842           throws IOException, XNIException {
  843           // name
  844           if (fNamespaces) {
  845               fEntityScanner.scanQName(fElementQName);
  846           }
  847           else {
  848               String name = fEntityScanner.scanName();
  849               fElementQName.setValues(null, name, name, null);
  850           }
  851           // Must skip spaces here because the DTD scanner
  852           // would consume them at the end of the external subset.
  853           fSawSpace = fEntityScanner.skipSpaces();
  854       } // scanStartElementName()
  855   
  856       /**
  857        * Scans the remainder of a start or empty tag after the element name.
  858        * 
  859        * @see #scanStartElement
  860        * @return True if element is empty.
  861        */
  862       protected boolean scanStartElementAfterName()
  863           throws IOException, XNIException {
  864           String rawname = fElementQName.rawname;
  865   
  866           // push element stack
  867           fCurrentElement = fElementStack.pushElement(fElementQName);
  868   
  869           // attributes
  870           boolean empty = false;
  871           fAttributes.removeAllAttributes();
  872           do {
  873           	
  874               // end tag?
  875               int c = fEntityScanner.peekChar();
  876               if (c == '>') {
  877                   fEntityScanner.scanChar();
  878                   break;
  879               }
  880               else if (c == '/') {
  881                   fEntityScanner.scanChar();
  882                   if (!fEntityScanner.skipChar('>')) {
  883                       reportFatalError("ElementUnterminated",
  884                                        new Object[]{rawname});
  885                   }
  886                   empty = true;
  887                   break;
  888               }
  889               else if (!isValidNameStartChar(c) || !fSawSpace) {
  890                   // Second chance. Check if this character is a high
  891                   // surrogate of a valid name start character.
  892                   if (!isValidNameStartHighSurrogate(c) || !fSawSpace) {
  893                       reportFatalError("ElementUnterminated",
  894                                        new Object[] { rawname });
  895                   }
  896               }
  897   
  898               // attributes
  899               scanAttribute(fAttributes);
  900               
  901               // spaces
  902               fSawSpace = fEntityScanner.skipSpaces();
  903   
  904           } while (true);
  905   
  906           // call handler
  907           if (fDocumentHandler != null) {
  908               if (empty) {
  909   
  910                   //decrease the markup depth..
  911                   fMarkupDepth--;
  912                   // check that this element was opened in the same entity
  913                   if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
  914                       reportFatalError("ElementEntityMismatch",
  915                                        new Object[]{fCurrentElement.rawname});
  916                   }
  917   
  918                   fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
  919   
  920                   //pop the element off the stack..
  921                   fElementStack.popElement(fElementQName);
  922               }
  923               else {
  924                   fDocumentHandler.startElement(fElementQName, fAttributes, null);
  925               }
  926           }
  927   
  928           if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElementAfterName(): "+empty);
  929           return empty;
  930       } // scanStartElementAfterName()
  931   
  932       /** 
  933        * Scans an attribute.
  934        * <p>
  935        * <pre>
  936        * [41] Attribute ::= Name Eq AttValue
  937        * </pre> 
  938        * <p>
  939        * <strong>Note:</strong> This method assumes that the next 
  940        * character on the stream is the first character of the attribute
  941        * name.
  942        * <p>
  943        * <strong>Note:</strong> This method uses the fAttributeQName and
  944        * fQName variables. The contents of these variables will be
  945        * destroyed.
  946        *
  947        * @param attributes The attributes list for the scanned attribute.
  948        */
  949       protected void scanAttribute(XMLAttributes attributes) 
  950           throws IOException, XNIException {
  951           if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()");
  952   
  953           // name
  954           if (fNamespaces) {
  955               fEntityScanner.scanQName(fAttributeQName);
  956           }
  957           else {
  958               String name = fEntityScanner.scanName();
  959               fAttributeQName.setValues(null, name, name, null);
  960           }
  961   
  962           // equals
  963           fEntityScanner.skipSpaces();
  964           if (!fEntityScanner.skipChar('=')) {
  965               reportFatalError("EqRequiredInAttribute",
  966                                new Object[]{fCurrentElement.rawname,fAttributeQName.rawname});
  967           }
  968           fEntityScanner.skipSpaces();
  969   
  970           // content
  971           int oldLen = attributes.getLength();
  972           int attrIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
  973   
  974           // WFC: Unique Att Spec
  975           if (oldLen == attributes.getLength()) {
  976               reportFatalError("AttributeNotUnique",
  977                                new Object[]{fCurrentElement.rawname,
  978                                             fAttributeQName.rawname});
  979           }      
  980           
  981           // Scan attribute value and return true if the un-normalized and normalized value are the same
  982           boolean isSameNormalizedAttr =  scanAttributeValue(fTempString, fTempString2,
  983                   fAttributeQName.rawname, fIsEntityDeclaredVC, fCurrentElement.rawname);
  984           
  985           attributes.setValue(attrIndex, fTempString.toString());
  986           // If the non-normalized and normalized value are the same, avoid creating a new string.
  987           if (!isSameNormalizedAttr) {
  988               attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  989           }
  990           attributes.setSpecified(attrIndex, true);
  991   
  992           if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()");
  993       } // scanAttribute(XMLAttributes)
  994   
  995       /**
  996        * Scans element content.
  997        *
  998        * @return Returns the next character on the stream.
  999        */
 1000       protected int scanContent() throws IOException, XNIException {
 1001   
 1002           XMLString content = fTempString;
 1003           int c = fEntityScanner.scanContent(content);
 1004           if (c == '\r') {
 1005               // happens when there is the character reference &#13;
 1006               fEntityScanner.scanChar();
 1007               fStringBuffer.clear();
 1008               fStringBuffer.append(fTempString);
 1009               fStringBuffer.append((char)c);
 1010               content = fStringBuffer;
 1011               c = -1;
 1012           }
 1013           if (fDocumentHandler != null && content.length > 0) {
 1014               fDocumentHandler.characters(content, null);
 1015           }
 1016   
 1017           if (c == ']' && fTempString.length == 0) {
 1018               fStringBuffer.clear();
 1019               fStringBuffer.append((char)fEntityScanner.scanChar());
 1020               // remember where we are in case we get an endEntity before we
 1021               // could flush the buffer out - this happens when we're parsing an
 1022               // entity which ends with a ]
 1023               fInScanContent = true;
 1024               //
 1025               // We work on a single character basis to handle cases such as:
 1026               // ']]]>' which we might otherwise miss.
 1027               //
 1028               if (fEntityScanner.skipChar(']')) {
 1029                   fStringBuffer.append(']');
 1030                   while (fEntityScanner.skipChar(']')) {
 1031                       fStringBuffer.append(']');
 1032                   }
 1033                   if (fEntityScanner.skipChar('>')) {
 1034                       reportFatalError("CDEndInContent", null);
 1035                   }
 1036               }
 1037               if (fDocumentHandler != null && fStringBuffer.length != 0) {
 1038                   fDocumentHandler.characters(fStringBuffer, null);
 1039               }
 1040               fInScanContent = false;
 1041               c = -1;
 1042           }
 1043           return c;
 1044   
 1045       } // scanContent():int
 1046   
 1047   
 1048       /** 
 1049        * Scans a CDATA section. 
 1050        * <p>
 1051        * <strong>Note:</strong> This method uses the fTempString and
 1052        * fStringBuffer variables.
 1053        *
 1054        * @param complete True if the CDATA section is to be scanned
 1055        *                 completely.
 1056        *
 1057        * @return True if CDATA is completely scanned.
 1058        */
 1059       protected boolean scanCDATASection(boolean complete) 
 1060           throws IOException, XNIException {
 1061           
 1062           // call handler
 1063           if (fDocumentHandler != null) {
 1064               fDocumentHandler.startCDATA(null);
 1065           }
 1066   
 1067           while (true) {
 1068               fStringBuffer.clear();
 1069               if (!fEntityScanner.scanData("]]", fStringBuffer)) {
 1070                   if (fDocumentHandler != null && fStringBuffer.length > 0) {
 1071                       fDocumentHandler.characters(fStringBuffer, null);
 1072                   }
 1073                   int brackets = 0;
 1074                   while (fEntityScanner.skipChar(']')) {
 1075                       brackets++;
 1076                   }
 1077                   if (fDocumentHandler != null && brackets > 0) {
 1078                       fStringBuffer.clear();
 1079                       if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) {
 1080                           // Handle large sequences of ']'
 1081                           int chunks = brackets / XMLEntityManager.DEFAULT_BUFFER_SIZE;
 1082                           int remainder = brackets % XMLEntityManager.DEFAULT_BUFFER_SIZE;
 1083                           for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) {
 1084                               fStringBuffer.append(']');
 1085                           }
 1086                           for (int i = 0; i < chunks; i++) {
 1087                               fDocumentHandler.characters(fStringBuffer, null);
 1088                           }
 1089                           if (remainder != 0) {
 1090                               fStringBuffer.length = remainder;
 1091                               fDocumentHandler.characters(fStringBuffer, null);
 1092                           }
 1093                       }
 1094                       else {
 1095                       	for (int i = 0; i < brackets; i++) {
 1096                       	    fStringBuffer.append(']');
 1097                       	}
 1098                          fDocumentHandler.characters(fStringBuffer, null);
 1099                       }
 1100                   }
 1101                   if (fEntityScanner.skipChar('>')) {
 1102                       break;
 1103                   }
 1104                   if (fDocumentHandler != null) {
 1105                       fStringBuffer.clear();
 1106                       fStringBuffer.append("]]");
 1107                       fDocumentHandler.characters(fStringBuffer, null);
 1108                   }
 1109               }
 1110               else {
 1111                   if (fDocumentHandler != null) {
 1112                       fDocumentHandler.characters(fStringBuffer, null);
 1113                   }
 1114                   int c = fEntityScanner.peekChar();
 1115                   if (c != -1 && isInvalidLiteral(c)) {
 1116                       if (XMLChar.isHighSurrogate(c)) {
 1117                           fStringBuffer.clear();
 1118                           scanSurrogates(fStringBuffer);
 1119                           if (fDocumentHandler != null) {
 1120                               fDocumentHandler.characters(fStringBuffer, null);
 1121                           }
 1122                       }
 1123                       else {
 1124                           reportFatalError("InvalidCharInCDSect",
 1125                                           new Object[]{Integer.toString(c,16)});
 1126                           fEntityScanner.scanChar();
 1127                       }
 1128                   }
 1129               }
 1130           }
 1131           fMarkupDepth--;
 1132   
 1133           // call handler
 1134           if (fDocumentHandler != null) {
 1135               fDocumentHandler.endCDATA(null);
 1136           }
 1137   
 1138           return true;
 1139   
 1140       } // scanCDATASection(boolean):boolean
 1141   
 1142       /**
 1143        * Scans an end element.
 1144        * <p>
 1145        * <pre>
 1146        * [42] ETag ::= '&lt;/' Name S? '>'
 1147        * </pre>
 1148        * <p>
 1149        * <strong>Note:</strong> This method uses the fElementQName variable.
 1150        * The contents of this variable will be destroyed. The caller should
 1151        * copy the needed information out of this variable before calling
 1152        * this method.
 1153        *
 1154        * @return The element depth.
 1155        */
 1156       protected int scanEndElement() throws IOException, XNIException {
 1157           if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanEndElement()");
 1158   
 1159           fElementStack.popElement(fElementQName) ;
 1160   
 1161           // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
 1162           //In scanners most of the time is consumed on checks done for XML characters, we can
 1163           // optimize on it and avoid the checks done for endElement,
 1164           //we will also avoid symbol table lookup - neeraj.bajaj@sun.com
 1165   
 1166           // this should work both for namespace processing true or false...
 1167   
 1168           //REVISIT: if the string is not the same as expected.. we need to do better error handling..
 1169           //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
 1170           if (!fEntityScanner.skipString(fElementQName.rawname)) {
 1171               reportFatalError("ETagRequired", new Object[]{fElementQName.rawname});
 1172           }
 1173   
 1174           // end
 1175           fEntityScanner.skipSpaces();
 1176           if (!fEntityScanner.skipChar('>')) {
 1177               reportFatalError("ETagUnterminated",
 1178                                new Object[]{fElementQName.rawname});
 1179           }
 1180           fMarkupDepth--;
 1181   
 1182           //we have increased the depth for two markup "<" characters
 1183           fMarkupDepth--;
 1184         
 1185           // check that this element was opened in the same entity
 1186           if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
 1187               reportFatalError("ElementEntityMismatch",
 1188                                new Object[]{fCurrentElement.rawname});
 1189           }
 1190   
 1191           // call handler
 1192           if (fDocumentHandler != null ) {
 1193               fDocumentHandler.endElement(fElementQName, null);
 1194           }
 1195   
 1196           return fMarkupDepth;
 1197    
 1198       } // scanEndElement():int
 1199   
 1200       /**
 1201        * Scans a character reference.
 1202        * <p>
 1203        * <pre>
 1204        * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 1205        * </pre>
 1206        */
 1207       protected void scanCharReference() 
 1208           throws IOException, XNIException {
 1209   
 1210           fStringBuffer2.clear();
 1211           int ch = scanCharReferenceValue(fStringBuffer2, null);
 1212           fMarkupDepth--;
 1213           if (ch != -1) {
 1214               // call handler
 1215               if (fDocumentHandler != null) {
 1216                   if (fNotifyCharRefs) {
 1217                       fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null);
 1218                   }
 1219                   Augmentations augs = null;
 1220                   if (fValidation && ch <= 0x20) {
 1221                       if (fTempAugmentations != null) {
 1222                           fTempAugmentations.removeAllItems();
 1223                       }
 1224                       else {
 1225                           fTempAugmentations = new AugmentationsImpl();
 1226                       }
 1227                       augs = fTempAugmentations;
 1228                       augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE);
 1229                   }
 1230                   fDocumentHandler.characters(fStringBuffer2, augs);
 1231                   if (fNotifyCharRefs) {
 1232                       fDocumentHandler.endGeneralEntity(fCharRefLiteral, null);
 1233                   }
 1234               }
 1235           }
 1236   
 1237       } // scanCharReference()
 1238   
 1239       /**
 1240        * Scans an entity reference.
 1241        *
 1242        * @throws IOException  Thrown if i/o error occurs.
 1243        * @throws XNIException Thrown if handler throws exception upon
 1244        *                      notification.
 1245        */
 1246       protected void scanEntityReference() throws IOException, XNIException {
 1247   
 1248           // name
 1249           String name = fEntityScanner.scanName();
 1250           if (name == null) {
 1251               reportFatalError("NameRequiredInReference", null);
 1252               return;
 1253           }
 1254   
 1255           // end
 1256           if (!fEntityScanner.skipChar(';')) {
 1257               reportFatalError("SemicolonRequiredInReference", new Object []{name});
 1258           }
 1259           fMarkupDepth--;
 1260   
 1261           // handle built-in entities
 1262           if (name == fAmpSymbol) {
 1263               handleCharacter('&', fAmpSymbol);
 1264           }
 1265           else if (name == fLtSymbol) {
 1266               handleCharacter('<', fLtSymbol);
 1267           }
 1268           else if (name == fGtSymbol) {
 1269               handleCharacter('>', fGtSymbol);
 1270           }
 1271           else if (name == fQuotSymbol) {
 1272               handleCharacter('"', fQuotSymbol);
 1273           }
 1274           else if (name == fAposSymbol) {
 1275               handleCharacter('\'', fAposSymbol);
 1276           }
 1277           // start general entity
 1278           else if (fEntityManager.isUnparsedEntity(name)) {
 1279               reportFatalError("ReferenceToUnparsedEntity", new Object[]{name});
 1280           }
 1281           else {
 1282               if (!fEntityManager.isDeclaredEntity(name)) {
 1283                   if (fIsEntityDeclaredVC) {
 1284                       if (fValidation)
 1285                           fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 
 1286                                                       new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR);
 1287                   }
 1288                   else {
 1289                       reportFatalError("EntityNotDeclared", new Object[]{name});
 1290                   }
 1291               }
 1292               fEntityManager.startEntity(name, false);
 1293           }
 1294   
 1295       } // scanEntityReference()
 1296   
 1297       // utility methods
 1298   
 1299       /** 
 1300        * Calls document handler with a single character resulting from
 1301        * built-in entity resolution. 
 1302        *
 1303        * @param c
 1304        * @param entity built-in name
 1305        */
 1306       private void handleCharacter(char c, String entity) throws XNIException {
 1307           if (fDocumentHandler != null) {
 1308               if (fNotifyBuiltInRefs) {
 1309                   fDocumentHandler.startGeneralEntity(entity, null, null, null);
 1310               }
 1311               
 1312               fSingleChar[0] = c;
 1313               fTempString.setValues(fSingleChar, 0, 1);
 1314               fDocumentHandler.characters(fTempString, null);
 1315               
 1316               if (fNotifyBuiltInRefs) {
 1317                   fDocumentHandler.endGeneralEntity(entity, null);
 1318               }
 1319           }
 1320       } // handleCharacter(char)
 1321   
 1322       /** 
 1323        * Handles the end element. This method will make sure that
 1324        * the end element name matches the current element and notify
 1325        * the handler about the end of the element and the end of any
 1326        * relevent prefix mappings.
 1327        * <p>
 1328        * <strong>Note:</strong> This method uses the fQName variable.
 1329        * The contents of this variable will be destroyed.
 1330        *
 1331        * @param element The element.
 1332        *
 1333        * @return The element depth.
 1334        *
 1335        * @throws XNIException Thrown if the handler throws a SAX exception
 1336        *                      upon notification.
 1337        *
 1338        */
 1339       // REVISIT: need to remove this method. It's not called anymore, because
 1340       // the handling is done when the end tag is scanned. - SG
 1341       protected int handleEndElement(QName element, boolean isEmpty) 
 1342           throws XNIException {
 1343   
 1344           fMarkupDepth--;
 1345           // check that this element was opened in the same entity
 1346           if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
 1347               reportFatalError("ElementEntityMismatch",
 1348                                new Object[]{fCurrentElement.rawname});
 1349           }
 1350           // make sure the elements match
 1351           QName startElement = fQName;
 1352           fElementStack.popElement(startElement);
 1353           if (element.rawname != startElement.rawname) {
 1354               reportFatalError("ETagRequired",
 1355                                new Object[]{startElement.rawname});
 1356           }
 1357   
 1358           // bind namespaces
 1359           if (fNamespaces) {
 1360               element.uri = startElement.uri;
 1361           }
 1362           
 1363           // call handler
 1364           if (fDocumentHandler != null && !isEmpty) {
 1365               fDocumentHandler.endElement(element, null);
 1366           }
 1367   
 1368           return fMarkupDepth;
 1369   
 1370       } // callEndElement(QName,boolean):int
 1371   
 1372       // helper methods
 1373   
 1374       /**
 1375        * Sets the scanner state.
 1376        *
 1377        * @param state The new scanner state.
 1378        */
 1379       protected final void setScannerState(int state) {
 1380   
 1381           fScannerState = state;
 1382           if (DEBUG_SCANNER_STATE) {
 1383               System.out.print("### setScannerState: ");
 1384               System.out.print(getScannerStateName(state));
 1385               System.out.println();
 1386           }
 1387   
 1388       } // setScannerState(int)
 1389   
 1390       /**
 1391        * Sets the dispatcher.
 1392        *
 1393        * @param dispatcher The new dispatcher.
 1394        */
 1395       protected final void setDispatcher(Dispatcher dispatcher) {
 1396           fDispatcher = dispatcher;
 1397           if (DEBUG_DISPATCHER) {
 1398               System.out.print("%%% setDispatcher: ");
 1399               System.out.print(getDispatcherName(dispatcher));
 1400               System.out.println();
 1401           }
 1402       }
 1403   
 1404       //
 1405       // Private methods
 1406       //
 1407   
 1408       /** Returns the scanner state name. */
 1409       protected String getScannerStateName(int state) {
 1410   
 1411           switch (state) {
 1412               case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE";
 1413               case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT";
 1414               case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP";
 1415               case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT";
 1416               case SCANNER_STATE_PI: return "SCANNER_STATE_PI";
 1417               case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT";
 1418               case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE";
 1419               case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT";
 1420               case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED";
 1421               case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA";
 1422               case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL";
 1423           }
 1424   
 1425           return "??? ("+state+')';
 1426   
 1427       } // getScannerStateName(int):String
 1428   
 1429       /** Returns the dispatcher name. */
 1430       public String getDispatcherName(Dispatcher dispatcher) {
 1431   
 1432           if (DEBUG_DISPATCHER) {
 1433               if (dispatcher != null) {
 1434                   String name = dispatcher.getClass().getName();
 1435                   int index = name.lastIndexOf('.');
 1436                   if (index != -1) {
 1437                       name = name.substring(index + 1);
 1438                       index = name.lastIndexOf('$');
 1439                       if (index != -1) {
 1440                           name = name.substring(index + 1);
 1441                       }
 1442                   }
 1443                   return name;
 1444               }
 1445           }
 1446           return "null";
 1447   
 1448       } // getDispatcherName():String
 1449   
 1450       //
 1451       // Classes
 1452       //
 1453   
 1454       /**
 1455        * Element stack. This stack operates without synchronization, error
 1456        * checking, and it re-uses objects instead of throwing popped items
 1457        * away.
 1458        *
 1459        * @author Andy Clark, IBM
 1460        */
 1461       protected static class ElementStack {
 1462   
 1463           //
 1464           // Data
 1465           //
 1466   
 1467           /** The stack data. */
 1468           protected QName[] fElements;
 1469   
 1470           /** The size of the stack. */
 1471           protected int fSize;
 1472   
 1473           //
 1474           // Constructors
 1475           //
 1476   
 1477           /** Default constructor. */
 1478           public ElementStack() {
 1479               fElements = new QName[10];
 1480               for (int i = 0; i < fElements.length; i++) {
 1481                   fElements[i] = new QName();
 1482               }
 1483           } // <init>()
 1484   
 1485           //
 1486           // Public methods
 1487           //
 1488   
 1489           /** 
 1490            * Pushes an element on the stack. 
 1491            * <p>
 1492            * <strong>Note:</strong> The QName values are copied into the
 1493            * stack. In other words, the caller does <em>not</em> orphan
 1494            * the element to the stack. Also, the QName object returned
 1495            * is <em>not</em> orphaned to the caller. It should be 
 1496            * considered read-only.
 1497            *
 1498            * @param element The element to push onto the stack.
 1499            *
 1500            * @return Returns the actual QName object that stores the
 1501            */
 1502           public QName pushElement(QName element) {
 1503               if (fSize == fElements.length) {
 1504                   QName[] array = new QName[fElements.length * 2];
 1505                   System.arraycopy(fElements, 0, array, 0, fSize);
 1506                   fElements = array;
 1507                   for (int i = fSize; i < fElements.length; i++) {
 1508                       fElements[i] = new QName();
 1509                   }
 1510               }
 1511               fElements[fSize].setValues(element);
 1512               return fElements[fSize++];
 1513           } // pushElement(QName):QName
 1514   
 1515           /** 
 1516            * Pops an element off of the stack by setting the values of
 1517            * the specified QName.
 1518            * <p>
 1519            * <strong>Note:</strong> The object returned is <em>not</em>
 1520            * orphaned to the caller. Therefore, the caller should consider
 1521            * the object to be read-only.
 1522            */
 1523           public void popElement(QName element) {
 1524               element.setValues(fElements[--fSize]);
 1525           } // popElement(QName)
 1526   
 1527           /** Clears the stack without throwing away existing QName objects. */
 1528           public void clear() {
 1529               fSize = 0;
 1530           } // clear()
 1531   
 1532       } // class ElementStack
 1533   
 1534       /** 
 1535        * This interface defines an XML "event" dispatching model. Classes
 1536        * that implement this interface are responsible for scanning parts
 1537        * of the XML document and dispatching callbacks.
 1538        * 
 1539        * @xerces.internal
 1540        *
 1541        * @author Glenn Marcy, IBM
 1542        */
 1543       protected interface Dispatcher {
 1544   
 1545           //
 1546           // Dispatcher methods
 1547           //
 1548   
 1549           /** 
 1550            * Dispatch an XML "event".
 1551            *
 1552            * @param complete True if this dispatcher is intended to scan
 1553            *                 and dispatch as much as possible.                 
 1554            *
 1555            * @return True if there is more to dispatch either from this 
 1556            *          or a another dispatcher.
 1557            *
 1558            * @throws IOException  Thrown on i/o error.
 1559            * @throws XNIException Thrown on parse error.
 1560            */
 1561           public boolean dispatch(boolean complete) 
 1562               throws IOException, XNIException;
 1563   
 1564       } // interface Dispatcher
 1565   
 1566       /**
 1567        * Dispatcher to handle content scanning.
 1568        *
 1569        * @author Andy Clark, IBM
 1570        * @author Eric Ye, IBM
 1571        */
 1572       protected class FragmentContentDispatcher
 1573           implements Dispatcher {
 1574   
 1575           //
 1576           // Dispatcher methods
 1577           //
 1578   
 1579           /** 
 1580            * Dispatch an XML "event".
 1581            *
 1582            * @param complete True if this dispatcher is intended to scan
 1583            *                 and dispatch as much as possible.                 
 1584            *
 1585            * @return True if there is more to dispatch either from this 
 1586            *          or a another dispatcher.
 1587            *
 1588            * @throws IOException  Thrown on i/o error.
 1589            * @throws XNIException Thrown on parse error.
 1590            */
 1591           public boolean dispatch(boolean complete) 
 1592               throws IOException, XNIException {
 1593               try {
 1594                   boolean again;
 1595                   do {
 1596                       again = false;
 1597                       switch (fScannerState) {
 1598                           case SCANNER_STATE_CONTENT: {
 1599                               if (fEntityScanner.skipChar('<')) {
 1600                                   setScannerState(SCANNER_STATE_START_OF_MARKUP);
 1601                                   again = true;
 1602                               }
 1603                               else if (fEntityScanner.skipChar('&')) {
 1604                                   setScannerState(SCANNER_STATE_REFERENCE);
 1605                                   again = true;
 1606                               }
 1607                               else {
 1608                                   do {
 1609                                       int c = scanContent();
 1610                                       if (c == '<') {
 1611                                           fEntityScanner.scanChar();
 1612                                           setScannerState(SCANNER_STATE_START_OF_MARKUP);
 1613                                           break;
 1614                                       }
 1615                                       else if (c == '&') {
 1616                                           fEntityScanner.scanChar();
 1617                                           setScannerState(SCANNER_STATE_REFERENCE);
 1618                                           break;
 1619                                       }
 1620                                       else if (c != -1 && isInvalidLiteral(c)) {
 1621                                           if (XMLChar.isHighSurrogate(c)) {
 1622                                               // special case: surrogates
 1623                                               fStringBuffer.clear();
 1624                                               if (scanSurrogates(fStringBuffer)) {
 1625                                                   // call handler
 1626                                                   if (fDocumentHandler != null) {
 1627                                                       fDocumentHandler.characters(fStringBuffer, null);
 1628                                                   }
 1629                                               }
 1630                                           }
 1631                                           else {
 1632                                               reportFatalError("InvalidCharInContent",
 1633                                                                new Object[] {
 1634                                                   Integer.toString(c, 16)});
 1635                                               fEntityScanner.scanChar();
 1636                                           }
 1637                                       }
 1638                                   } while (complete);
 1639                               }
 1640                               break;
 1641                           }
 1642                           case SCANNER_STATE_START_OF_MARKUP: {
 1643                               fMarkupDepth++;
 1644                               if (fEntityScanner.skipChar('/')) {
 1645                                   if (scanEndElement() == 0) {
 1646                                       if (elementDepthIsZeroHook()) {
 1647                                           return true;
 1648                                       }
 1649                                   }
 1650                                   setScannerState(SCANNER_STATE_CONTENT);
 1651                               }
 1652                               else if (isValidNameStartChar(fEntityScanner.peekChar())) {
 1653                                   scanStartElement();
 1654                                   setScannerState(SCANNER_STATE_CONTENT);
 1655                               }
 1656                               else if (fEntityScanner.skipChar('!')) {
 1657                                   if (fEntityScanner.skipChar('-')) {
 1658                                       if (!fEntityScanner.skipChar('-')) {
 1659                                           reportFatalError("InvalidCommentStart",
 1660                                                            null);
 1661                                       }
 1662                                       setScannerState(SCANNER_STATE_COMMENT);
 1663                                       again = true;
 1664                                   }
 1665                                   else if (fEntityScanner.skipString("[CDATA[")) {
 1666                                       setScannerState(SCANNER_STATE_CDATA);
 1667                                       again = true;
 1668                                   }
 1669                                   else if (!scanForDoctypeHook()) {
 1670                                       reportFatalError("MarkupNotRecognizedInContent",
 1671                                                        null);
 1672                                   }
 1673                               }
 1674                               else if (fEntityScanner.skipChar('?')) {
 1675                                   setScannerState(SCANNER_STATE_PI);
 1676                                   again = true;
 1677                               }
 1678                               else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
 1679                                   scanStartElement();
 1680                                   setScannerState(SCANNER_STATE_CONTENT);
 1681                               }
 1682                               else {
 1683                                   reportFatalError("MarkupNotRecognizedInContent",
 1684                                                    null);
 1685                                   setScannerState(SCANNER_STATE_CONTENT);                 
 1686                               }
 1687                               break;
 1688                           }
 1689                           case SCANNER_STATE_COMMENT: {
 1690                               scanComment();
 1691                               setScannerState(SCANNER_STATE_CONTENT);
 1692                               break;  
 1693                           }
 1694                           case SCANNER_STATE_PI: {
 1695                               scanPI();
 1696                               setScannerState(SCANNER_STATE_CONTENT);
 1697                               break;  
 1698                           }
 1699                           case SCANNER_STATE_CDATA: {
 1700                               scanCDATASection(complete);
 1701                               setScannerState(SCANNER_STATE_CONTENT);
 1702                               break;
 1703                           }
 1704                           case SCANNER_STATE_REFERENCE: {
 1705                               fMarkupDepth++;
 1706                               // NOTE: We need to set the state beforehand
 1707                               //       because the XMLEntityHandler#startEntity
 1708                               //       callback could set the state to
 1709                               //       SCANNER_STATE_TEXT_DECL and we don't want
 1710                               //       to override that scanner state.
 1711                               setScannerState(SCANNER_STATE_CONTENT);
 1712                               if (fEntityScanner.skipChar('#')) {
 1713                                   scanCharReference();
 1714                               }
 1715                               else {
 1716                                   scanEntityReference();
 1717                               }
 1718                               break;
 1719                           }
 1720                           case SCANNER_STATE_TEXT_DECL: {
 1721                               // scan text decl
 1722                               if (fEntityScanner.skipString("<?xml")) {
 1723                                   fMarkupDepth++;
 1724                                   // NOTE: special case where entity starts with a PI
 1725                                   //       whose name starts with "xml" (e.g. "xmlfoo")
 1726                                   if (isValidNameChar(fEntityScanner.peekChar())) {
 1727                                       fStringBuffer.clear();
 1728                                       fStringBuffer.append("xml");
 1729                                       if (fNamespaces) {
 1730                                           while (isValidNCName(fEntityScanner.peekChar())) {
 1731                                               fStringBuffer.append((char)fEntityScanner.scanChar());
 1732                                           }
 1733                                       }
 1734                                       else {
 1735                                           while (isValidNameChar(fEntityScanner.peekChar())) {
 1736                                               fStringBuffer.append((char)fEntityScanner.scanChar());
 1737                                           }
 1738                                       }
 1739                                       String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
 1740                                       scanPIData(target, fTempString);
 1741                                   }
 1742                   
 1743                                   // standard text declaration
 1744                                   else {
 1745                                       scanXMLDeclOrTextDecl(true);
 1746                                   }
 1747                               }
 1748                               // now that we've straightened out the readers, we can read in chunks:
 1749                               fEntityManager.fCurrentEntity.mayReadChunks = true;
 1750                               setScannerState(SCANNER_STATE_CONTENT);
 1751                               break;
 1752                           }
 1753                           case SCANNER_STATE_ROOT_ELEMENT: {
 1754                               if (scanRootElementHook()) {
 1755                                   return true;
 1756                               }
 1757                               setScannerState(SCANNER_STATE_CONTENT);
 1758                               break;
 1759                           }
 1760                           case SCANNER_STATE_DOCTYPE: {
 1761                               reportFatalError("DoctypeIllegalInContent",
 1762                                                null);
 1763                               setScannerState(SCANNER_STATE_CONTENT);
 1764                           }
 1765                       }
 1766                   } while (complete || again);
 1767               }
 1768               // encoding errors
 1769               catch (MalformedByteSequenceException e) {
 1770                   fErrorReporter.reportError(e.getDomain(), e.getKey(), 
 1771                       e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
 1772                   return false;
 1773               }
 1774               catch (CharConversionException e) {
 1775                   fErrorReporter.reportError(
 1776                           XMLMessageFormatter.XML_DOMAIN,
 1777                           "CharConversionFailure",
 1778                           null,
 1779                           XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
 1780                   return false;
 1781               }
 1782               // premature end of file
 1783               catch (EOFException e) {
 1784                   endOfFileHook(e);
 1785                   return false;
 1786               }
 1787   
 1788               return true;
 1789   
 1790           } // dispatch(boolean):boolean
 1791   
 1792           //
 1793           // Protected methods
 1794           //
 1795   
 1796           // hooks
 1797   
 1798           // NOTE: These hook methods are added so that the full document
 1799           //       scanner can share the majority of code with this class.
 1800   
 1801           /** 
 1802            * Scan for DOCTYPE hook. This method is a hook for subclasses
 1803            * to add code to handle scanning for a the "DOCTYPE" string 
 1804            * after the string "<!" has been scanned.
 1805            * 
 1806            * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
 1807            *          was not scanned.
 1808            */
 1809           protected boolean scanForDoctypeHook() 
 1810               throws IOException, XNIException {
 1811               return false;
 1812           } // scanForDoctypeHook():boolean
 1813   
 1814           /** 
 1815            * Element depth iz zero. This methos is a hook for subclasses
 1816            * to add code to handle when the element depth hits zero. When
 1817            * scanning a document fragment, an element depth of zero is
 1818            * normal. However, when scanning a full XML document, the
 1819            * scanner must handle the trailing miscellanous section of
 1820            * the document after the end of the document's root element.
 1821            *
 1822            * @return True if the caller should stop and return true which
 1823            *          allows the scanner to switch to a new scanning 
 1824            *          dispatcher. A return value of false indicates that
 1825            *          the content dispatcher should continue as normal.
 1826            */
 1827           protected boolean elementDepthIsZeroHook()
 1828               throws IOException, XNIException {
 1829               return false;
 1830           } // elementDepthIsZeroHook():boolean
 1831   
 1832           /**
 1833            * Scan for root element hook. This method is a hook for
 1834            * subclasses to add code that handles scanning for the root
 1835            * element. When scanning a document fragment, there is no
 1836            * "root" element. However, when scanning a full XML document,
 1837            * the scanner must handle the root element specially.
 1838            *
 1839            * @return True if the caller should stop and return true which
 1840            *          allows the scanner to switch to a new scanning 
 1841            *          dispatcher. A return value of false indicates that
 1842            *          the content dispatcher should continue as normal.
 1843            */
 1844           protected boolean scanRootElementHook()
 1845               throws IOException, XNIException {
 1846               return false;
 1847           } // scanRootElementHook():boolean
 1848   
 1849           /**
 1850            * End of file hook. This method is a hook for subclasses to
 1851            * add code that handles the end of file. The end of file in
 1852            * a document fragment is OK if the markup depth is zero.
 1853            * However, when scanning a full XML document, an end of file
 1854            * is always premature.
 1855            */
 1856           protected void endOfFileHook(EOFException e) 
 1857               throws IOException, XNIException {
 1858   
 1859               // NOTE: An end of file is only only an error if we were
 1860               //       in the middle of scanning some markup. -Ac
 1861               if (fMarkupDepth != 0) {
 1862                   reportFatalError("PrematureEOF", null);
 1863               }
 1864   
 1865           } // endOfFileHook()
 1866   
 1867       } // class FragmentContentDispatcher
 1868   
 1869   } // class XMLDocumentFragmentScannerImpl

Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » [javadoc | source]