Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.xerces.impl;
   19   
   20   import java.io.IOException;
   21   
   22   import org.apache.xerces.impl.msg.XMLMessageFormatter;
   23   import org.apache.xerces.util.SymbolTable;
   24   import org.apache.xerces.util.XMLChar;
   25   import org.apache.xerces.util.XMLResourceIdentifierImpl;
   26   import org.apache.xerces.util.XMLStringBuffer;
   27   import org.apache.xerces.xni.Augmentations;
   28   import org.apache.xerces.xni.XMLResourceIdentifier;
   29   import org.apache.xerces.xni.XMLString;
   30   import org.apache.xerces.xni.XNIException;
   31   import org.apache.xerces.xni.parser.XMLComponent;
   32   import org.apache.xerces.xni.parser.XMLComponentManager;
   33   import org.apache.xerces.xni.parser.XMLConfigurationException;
   34   
   35   /**
   36    * This class is responsible for holding scanning methods common to
   37    * scanning the XML document structure and content as well as the DTD
   38    * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
   39    * from this base class.
   40    *
   41    * <p>
   42    * This component requires the following features and properties from the
   43    * component manager that uses it:
   44    * <ul>
   45    *  <li>http://xml.org/sax/features/validation</li> 
   46    *  <li>http://xml.org/sax/features/namespaces</li>
   47    *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
   48    *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
   49    *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
   50    *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
   51    * </ul>
   52    * 
   53    * @xerces.internal
   54    *
   55    * @author Andy Clark, IBM
   56    * @author Arnaud  Le Hors, IBM
   57    * @author Eric Ye, IBM
   58    *
   59    * @version $Id: XMLScanner.java 572055 2007-09-02 17:55:43Z mrglavas $
   60    */
   61   public abstract class XMLScanner 
   62       implements XMLComponent {
   63   
   64       //
   65       // Constants
   66       //
   67   
   68       // feature identifiers
   69   
   70       /** Feature identifier: validation. */
   71       protected static final String VALIDATION =
   72           Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
   73   
   74       /** Feature identifier: namespaces. */
   75       protected static final String NAMESPACES = 
   76           Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
   77   
   78       /** Feature identifier: notify character references. */
   79       protected static final String NOTIFY_CHAR_REFS =
   80           Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
   81   	
   82   	protected static final String PARSER_SETTINGS = 
   83   				Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
   84   
   85       // property identifiers
   86   
   87       /** Property identifier: symbol table. */
   88       protected static final String SYMBOL_TABLE = 
   89           Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
   90   
   91       /** Property identifier: error reporter. */
   92       protected static final String ERROR_REPORTER = 
   93           Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
   94   
   95       /** Property identifier: entity manager. */
   96       protected static final String ENTITY_MANAGER = 
   97           Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
   98   
   99       // debugging
  100   
  101       /** Debug attribute normalization. */
  102       protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
  103   
  104       //
  105       // Data
  106       //
  107       
  108   
  109       // features
  110   
  111       /** 
  112        * Validation. This feature identifier is:
  113        * http://xml.org/sax/features/validation
  114        */
  115       protected boolean fValidation = false;
  116       
  117       /** Namespaces. */
  118       protected boolean fNamespaces;
  119   
  120       /** Character references notification. */
  121       protected boolean fNotifyCharRefs = false;
  122       
  123       /** Internal parser-settings feature */
  124   	protected boolean fParserSettings = true;
  125   	
  126       // properties
  127   
  128       /** Symbol table. */
  129       protected SymbolTable fSymbolTable;
  130   
  131       /** Error reporter. */
  132       protected XMLErrorReporter fErrorReporter;
  133   
  134       /** Entity manager. */
  135       protected XMLEntityManager fEntityManager;
  136   
  137       // protected data
  138   
  139       /** Entity scanner. */
  140       protected XMLEntityScanner fEntityScanner;
  141   
  142       /** Entity depth. */
  143       protected int fEntityDepth;
  144   
  145       /** Literal value of the last character refence scanned. */
  146       protected String fCharRefLiteral = null;
  147   
  148       /** Scanning attribute. */
  149       protected boolean fScanningAttribute;
  150   
  151       /** Report entity boundary. */
  152       protected boolean fReportEntity;
  153   
  154       // symbols
  155   
  156       /** Symbol: "version". */
  157       protected final static String fVersionSymbol = "version".intern();
  158   
  159       /** Symbol: "encoding". */
  160       protected final static String fEncodingSymbol = "encoding".intern();
  161   
  162       /** Symbol: "standalone". */
  163       protected final static String fStandaloneSymbol = "standalone".intern();
  164   
  165       /** Symbol: "amp". */
  166       protected final static String fAmpSymbol = "amp".intern();
  167   
  168       /** Symbol: "lt". */
  169       protected final static String fLtSymbol = "lt".intern();
  170   
  171       /** Symbol: "gt". */
  172       protected final static String fGtSymbol = "gt".intern();
  173   
  174       /** Symbol: "quot". */
  175       protected final static String fQuotSymbol = "quot".intern();
  176   
  177       /** Symbol: "apos". */
  178       protected final static String fAposSymbol = "apos".intern();
  179   
  180       // temporary variables
  181   
  182       // NOTE: These objects are private to help prevent accidental modification
  183       //       of values by a subclass. If there were protected *and* the sub-
  184       //       modified the values, it would be difficult to track down the real
  185       //       cause of the bug. By making these private, we avoid this 
  186       //       possibility.
  187   
  188       /** String. */
  189       private final XMLString fString = new XMLString();
  190   
  191       /** String buffer. */
  192       private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  193   
  194       /** String buffer. */
  195       private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  196   
  197       /** String buffer. */
  198       private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
  199   
  200       // temporary location for Resource identification information.
  201       protected final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
  202   
  203       //
  204       // XMLComponent methods
  205       //
  206   
  207       /**
  208        * 
  209        * 
  210        * @param componentManager The component manager.
  211        *
  212        * @throws SAXException Throws exception if required features and
  213        *                      properties cannot be found.
  214        */
  215       public void reset(XMLComponentManager componentManager)
  216           throws XMLConfigurationException {
  217   
  218   		try {
  219   			fParserSettings = componentManager.getFeature(PARSER_SETTINGS);
  220   		} catch (XMLConfigurationException e) {
  221   			fParserSettings = true;
  222   		}
  223   
  224   		if (!fParserSettings) {
  225   			// parser settings have not been changed
  226   			init();
  227   			return;
  228   		}
  229   
  230           // Xerces properties
  231           fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
  232           fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
  233           fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
  234   
  235           // sax features
  236           try {
  237               fValidation = componentManager.getFeature(VALIDATION);
  238           }
  239           catch (XMLConfigurationException e) {
  240               fValidation = false;
  241           }
  242           try {
  243               fNamespaces = componentManager.getFeature(NAMESPACES);
  244           }
  245           catch (XMLConfigurationException e) {
  246               fNamespaces = true;
  247           }
  248           try {
  249               fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS);
  250           }
  251           catch (XMLConfigurationException e) {
  252               fNotifyCharRefs = false;
  253           }
  254           
  255           init();
  256   
  257       } // reset(XMLComponentManager)
  258   
  259       /**
  260        * Sets the value of a property during parsing.
  261        * 
  262        * @param propertyId 
  263        * @param value 
  264        */
  265       public void setProperty(String propertyId, Object value)
  266           throws XMLConfigurationException {
  267           
  268           // Xerces properties
  269           if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
  270           	final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
  271           	
  272               if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 
  273                   propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
  274                   fSymbolTable = (SymbolTable)value;
  275               }
  276               else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 
  277                   propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
  278                   fErrorReporter = (XMLErrorReporter)value;
  279               }
  280               else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 
  281                   propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
  282                   fEntityManager = (XMLEntityManager)value;
  283               }
  284           }
  285   
  286       } // setProperty(String,Object)
  287   
  288       /*
  289        * Sets the feature of the scanner.
  290        */
  291       public void setFeature(String featureId, boolean value)
  292           throws XMLConfigurationException {
  293               
  294           if (VALIDATION.equals(featureId)) {
  295               fValidation = value;
  296           } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
  297               fNotifyCharRefs = value;
  298           }
  299       }
  300       
  301       /*
  302        * Gets the state of the feature of the scanner.
  303        */
  304       public boolean getFeature(String featureId)
  305           throws XMLConfigurationException {
  306               
  307           if (VALIDATION.equals(featureId)) {
  308               return fValidation;
  309           } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
  310               return fNotifyCharRefs;
  311           }
  312           throw new XMLConfigurationException(XMLConfigurationException.NOT_RECOGNIZED, featureId);
  313       }
  314       
  315       //
  316       // Protected methods
  317       //
  318   
  319       // anybody calling this had better have set Symtoltable!
  320       protected void reset() {
  321           init();
  322   
  323           // DTD preparsing defaults:
  324           fValidation = true;
  325           fNotifyCharRefs = false;
  326   
  327       }
  328   
  329       // common scanning methods
  330   
  331       /**
  332        * Scans an XML or text declaration.
  333        * <p>
  334        * <pre>
  335        * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  336        * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  337        * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
  338        * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  339        * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
  340        *                 | ('"' ('yes' | 'no') '"'))
  341        *
  342        * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  343        * </pre>
  344        *
  345        * @param scanningTextDecl True if a text declaration is to
  346        *                         be scanned instead of an XML
  347        *                         declaration.
  348        * @param pseudoAttributeValues An array of size 3 to return the version,
  349        *                         encoding and standalone pseudo attribute values
  350        *                         (in that order).
  351        *
  352        * <strong>Note:</strong> This method uses fString, anything in it
  353        * at the time of calling is lost.
  354        */
  355       protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
  356                                            String[] pseudoAttributeValues) 
  357           throws IOException, XNIException {
  358   
  359           // pseudo-attribute values
  360           String version = null;
  361           String encoding = null;
  362           String standalone = null;
  363   
  364           // scan pseudo-attributes
  365           final int STATE_VERSION = 0;
  366           final int STATE_ENCODING = 1;
  367           final int STATE_STANDALONE = 2;
  368           final int STATE_DONE = 3;
  369           int state = STATE_VERSION;
  370   
  371           boolean dataFoundForTarget = false;
  372           boolean sawSpace = fEntityScanner.skipDeclSpaces();
  373           // since pseudoattributes are *not* attributes,
  374           // their quotes don't need to be preserved in external parameter entities.
  375           // the XMLEntityScanner#scanLiteral method will continue to
  376           // emit -1 in such cases when it finds a quote; this is
  377           // fine for other methods that parse scanned entities,
  378           // but not for the scanning of pseudoattributes.  So,
  379           // temporarily, we must mark the current entity as not being "literal"
  380           XMLEntityManager.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
  381           boolean currLiteral = currEnt.literal;
  382           currEnt.literal = false;
  383           while (fEntityScanner.peekChar() != '?') {
  384               dataFoundForTarget = true;
  385               String name = scanPseudoAttribute(scanningTextDecl, fString);
  386               switch (state) {
  387                   case STATE_VERSION: {
  388                       if (name == fVersionSymbol) {
  389                           if (!sawSpace) {
  390                               reportFatalError(scanningTextDecl
  391                                          ? "SpaceRequiredBeforeVersionInTextDecl"
  392                                          : "SpaceRequiredBeforeVersionInXMLDecl",
  393                                                null);
  394                           }
  395                           version = fString.toString();
  396                           state = STATE_ENCODING;
  397                           if (!versionSupported(version)) {
  398                               reportFatalError(getVersionNotSupportedKey(), 
  399                                                new Object[]{version});
  400                           }
  401                       }
  402                       else if (name == fEncodingSymbol) {
  403                           if (!scanningTextDecl) {
  404                               reportFatalError("VersionInfoRequired", null);
  405                           }
  406                           if (!sawSpace) {
  407                               reportFatalError(scanningTextDecl
  408                                         ? "SpaceRequiredBeforeEncodingInTextDecl"
  409                                         : "SpaceRequiredBeforeEncodingInXMLDecl",
  410                                                null);
  411                           }
  412                           encoding = fString.toString();
  413                           state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  414                       }
  415                       else {
  416                           if (scanningTextDecl) {
  417                               reportFatalError("EncodingDeclRequired", null);
  418                           }
  419                           else {
  420                               reportFatalError("VersionInfoRequired", null);
  421                           }
  422                       }
  423                       break;
  424                   }
  425                   case STATE_ENCODING: {
  426                       if (name == fEncodingSymbol) {
  427                           if (!sawSpace) {
  428                               reportFatalError(scanningTextDecl
  429                                         ? "SpaceRequiredBeforeEncodingInTextDecl"
  430                                         : "SpaceRequiredBeforeEncodingInXMLDecl",
  431                                                null);
  432                           }
  433                           encoding = fString.toString();
  434                           state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  435                           // TODO: check encoding name; set encoding on
  436                           //       entity scanner
  437                       }
  438                       else if (!scanningTextDecl && name == fStandaloneSymbol) {
  439                           if (!sawSpace) {
  440                               reportFatalError("SpaceRequiredBeforeStandalone",
  441                                                null);
  442                           }
  443                           standalone = fString.toString();
  444                           state = STATE_DONE;
  445                           if (!standalone.equals("yes") && !standalone.equals("no")) {
  446                               reportFatalError("SDDeclInvalid", new Object[] {standalone});
  447                           }
  448                       }
  449                       else {
  450                           reportFatalError("EncodingDeclRequired", null);
  451                       }
  452                       break;
  453                   }
  454                   case STATE_STANDALONE: {
  455                       if (name == fStandaloneSymbol) {
  456                           if (!sawSpace) {
  457                               reportFatalError("SpaceRequiredBeforeStandalone",
  458                                                null);
  459                           }
  460                           standalone = fString.toString();
  461                           state = STATE_DONE;
  462                           if (!standalone.equals("yes") && !standalone.equals("no")) {
  463                               reportFatalError("SDDeclInvalid", new Object[] {standalone});
  464                           }
  465                       }
  466                       else {
  467                           reportFatalError("EncodingDeclRequired", null);
  468                       }
  469                       break;
  470                   }
  471                   default: {
  472                       reportFatalError("NoMorePseudoAttributes", null);
  473                   }
  474               }
  475               sawSpace = fEntityScanner.skipDeclSpaces();
  476           }
  477           // restore original literal value
  478           if(currLiteral) 
  479               currEnt.literal = true;
  480           // REVISIT: should we remove this error reporting?
  481           if (scanningTextDecl && state != STATE_DONE) {
  482               reportFatalError("MorePseudoAttributes", null);
  483           }
  484           
  485           // If there is no data in the xml or text decl then we fail to report error 
  486           // for version or encoding info above.
  487           if (scanningTextDecl) {
  488               if (!dataFoundForTarget && encoding == null) {
  489                   reportFatalError("EncodingDeclRequired", null);
  490               }
  491           }
  492           else {
  493               if (!dataFoundForTarget && version == null) {
  494                   reportFatalError("VersionInfoRequired", null);
  495               }
  496           }
  497   
  498           // end
  499           if (!fEntityScanner.skipChar('?')) {
  500               reportFatalError("XMLDeclUnterminated", null);
  501           }
  502           if (!fEntityScanner.skipChar('>')) {
  503               reportFatalError("XMLDeclUnterminated", null);
  504   
  505           }
  506           
  507           // fill in return array
  508           pseudoAttributeValues[0] = version;
  509           pseudoAttributeValues[1] = encoding;
  510           pseudoAttributeValues[2] = standalone;
  511   
  512       } // scanXMLDeclOrTextDecl(boolean)
  513   
  514       /**
  515        * Scans a pseudo attribute.
  516        *
  517        * @param scanningTextDecl True if scanning this pseudo-attribute for a
  518        *                         TextDecl; false if scanning XMLDecl. This 
  519        *                         flag is needed to report the correct type of
  520        *                         error.
  521        * @param value            The string to fill in with the attribute 
  522        *                         value.
  523        *
  524        * @return The name of the attribute
  525        *
  526        * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
  527        * at the time of calling is lost.
  528        */
  529       public String scanPseudoAttribute(boolean scanningTextDecl, 
  530                                         XMLString value) 
  531           throws IOException, XNIException {
  532   
  533           // REVISIT: This method is used for generic scanning of 
  534           // pseudo attributes, but since there are only three such
  535           // attributes: version, encoding, and standalone there are
  536           // for performant ways of scanning them. Every decl must
  537           // have a version, and in TextDecls this version must
  538           // be followed by an encoding declaration. Also the
  539           // methods we invoke on the scanners allow non-ASCII
  540           // characters to be parsed in the decls, but since
  541           // we don't even know what the actual encoding of the
  542           // document is until we scan the encoding declaration
  543           // you cannot reliably read any characters outside
  544           // of the ASCII range here. -- mrglavas
  545           String name = fEntityScanner.scanName();
  546           XMLEntityManager.print(fEntityManager.getCurrentEntity());
  547           if (name == null) {
  548               reportFatalError("PseudoAttrNameExpected", null);
  549           }
  550           fEntityScanner.skipDeclSpaces();
  551           if (!fEntityScanner.skipChar('=')) {
  552               reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
  553                                : "EqRequiredInXMLDecl", new Object[]{name});
  554           }
  555           fEntityScanner.skipDeclSpaces();
  556           int quote = fEntityScanner.peekChar();
  557           if (quote != '\'' && quote != '"') {
  558               reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
  559                                : "QuoteRequiredInXMLDecl" , new Object[]{name});
  560           }
  561           fEntityScanner.scanChar();
  562           int c = fEntityScanner.scanLiteral(quote, value);
  563           if (c != quote) {
  564               fStringBuffer2.clear();
  565               do {
  566                   fStringBuffer2.append(value);
  567                   if (c != -1) {
  568                       if (c == '&' || c == '%' || c == '<' || c == ']') {
  569                           fStringBuffer2.append((char)fEntityScanner.scanChar());
  570                       }
  571                       // REVISIT: Even if you could reliably read non-ASCII chars
  572                       // why bother scanning for surrogates here? Only ASCII chars
  573                       // match the productions in XMLDecls and TextDecls. -- mrglavas
  574                       else if (XMLChar.isHighSurrogate(c)) {
  575                           scanSurrogates(fStringBuffer2);
  576                       }
  577                       else if (isInvalidLiteral(c)) {
  578                           String key = scanningTextDecl
  579                               ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
  580                           reportFatalError(key,
  581                                          new Object[] {Integer.toString(c, 16)});
  582                           fEntityScanner.scanChar();
  583                       }
  584                   }
  585                   c = fEntityScanner.scanLiteral(quote, value);
  586               } while (c != quote);
  587               fStringBuffer2.append(value);
  588               value.setValues(fStringBuffer2);
  589           }
  590           if (!fEntityScanner.skipChar(quote)) {
  591               reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
  592                                : "CloseQuoteMissingInXMLDecl",
  593                                new Object[]{name});
  594           }
  595   
  596           // return
  597           return name;
  598   
  599       } // scanPseudoAttribute(XMLString):String
  600       
  601       /**
  602        * Scans a processing instruction.
  603        * <p>
  604        * <pre>
  605        * [16] PI ::= '&lt;?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  606        * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  607        * </pre>
  608        * <strong>Note:</strong> This method uses fString, anything in it
  609        * at the time of calling is lost.
  610        */
  611       protected void scanPI() throws IOException, XNIException {
  612   
  613           // target
  614           fReportEntity = false;
  615           String target = null;
  616           if(fNamespaces) {
  617               target = fEntityScanner.scanNCName();
  618           } else {
  619               target = fEntityScanner.scanName();
  620           }
  621           if (target == null) {
  622               reportFatalError("PITargetRequired", null);
  623           }
  624   
  625           // scan data
  626           scanPIData(target, fString);
  627           fReportEntity = true;
  628   
  629       } // scanPI()
  630   
  631       /**
  632        * Scans a processing data. This is needed to handle the situation
  633        * where a document starts with a processing instruction whose 
  634        * target name <em>starts with</em> "xml". (e.g. xmlfoo)
  635        *
  636        * <strong>Note:</strong> This method uses fStringBuffer, anything in it
  637        * at the time of calling is lost.
  638        *
  639        * @param target The PI target
  640        * @param data The string to fill in with the data
  641        */
  642       protected void scanPIData(String target, XMLString data) 
  643           throws IOException, XNIException {
  644   
  645           // check target
  646           if (target.length() == 3) {
  647               char c0 = Character.toLowerCase(target.charAt(0));
  648               char c1 = Character.toLowerCase(target.charAt(1));
  649               char c2 = Character.toLowerCase(target.charAt(2));
  650               if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
  651                   reportFatalError("ReservedPITarget", null);
  652               }
  653           }
  654   
  655           // spaces
  656           if (!fEntityScanner.skipSpaces()) {
  657               if (fEntityScanner.skipString("?>")) {
  658                   // we found the end, there is no data
  659                   data.clear();
  660                   return;
  661               }
  662               else {
  663                   if(fNamespaces && fEntityScanner.peekChar() == ':') { 
  664                       fEntityScanner.scanChar();
  665                       XMLStringBuffer colonName = new XMLStringBuffer(target);
  666                       colonName.append(":");
  667                       String str = fEntityScanner.scanName();
  668                       if (str != null)
  669                           colonName.append(str);
  670                       reportFatalError("ColonNotLegalWithNS", new Object[] {colonName.toString()});
  671                       fEntityScanner.skipSpaces();
  672                   } else {
  673                       // if there is data there should be some space
  674                       reportFatalError("SpaceRequiredInPI", null);
  675                   }
  676               }
  677           }
  678   
  679           fStringBuffer.clear();
  680           // data
  681           if (fEntityScanner.scanData("?>", fStringBuffer)) {
  682               do {
  683                   int c = fEntityScanner.peekChar();
  684                   if (c != -1) {
  685                       if (XMLChar.isHighSurrogate(c)) {
  686                           scanSurrogates(fStringBuffer);
  687                       }
  688                       else if (isInvalidLiteral(c)) {
  689                           reportFatalError("InvalidCharInPI",
  690                                            new Object[]{Integer.toHexString(c)});
  691                           fEntityScanner.scanChar();
  692                       }
  693                   }
  694               } while (fEntityScanner.scanData("?>", fStringBuffer));
  695           }
  696           data.setValues(fStringBuffer);
  697   
  698       } // scanPIData(String,XMLString)
  699   
  700       /**
  701        * Scans a comment.
  702        * <p>
  703        * <pre>
  704        * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  705        * </pre>
  706        * <p>
  707        * <strong>Note:</strong> Called after scanning past '&lt;!--'
  708        * <strong>Note:</strong> This method uses fString, anything in it
  709        * at the time of calling is lost.
  710        *
  711        * @param text The buffer to fill in with the text.
  712        */
  713       protected void scanComment(XMLStringBuffer text)
  714           throws IOException, XNIException {
  715   
  716           // text
  717           // REVISIT: handle invalid character, eof
  718           text.clear();
  719           while (fEntityScanner.scanData("--", text)) {
  720               int c = fEntityScanner.peekChar();
  721               if (c != -1) {
  722                   if (XMLChar.isHighSurrogate(c)) {
  723                       scanSurrogates(text);
  724                   }
  725                   else if (isInvalidLiteral(c)) {
  726                       reportFatalError("InvalidCharInComment",
  727                                        new Object[] { Integer.toHexString(c) }); 
  728                       fEntityScanner.scanChar();
  729                   }
  730               } 
  731           }
  732           if (!fEntityScanner.skipChar('>')) {
  733               reportFatalError("DashDashInComment", null);
  734           }
  735   
  736       } // scanComment()
  737   
  738       /**
  739        * Scans an attribute value and normalizes whitespace converting all
  740        * whitespace characters to space characters.
  741        * 
  742        * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
  743        *
  744        * @param value The XMLString to fill in with the value.
  745        * @param nonNormalizedValue The XMLString to fill in with the 
  746        *                           non-normalized value.
  747        * @param atName The name of the attribute being parsed (for error msgs).
  748        * @param checkEntities true if undeclared entities should be reported as VC violation,  
  749        *                      false if undeclared entities should be reported as WFC violation.
  750        * @param eleName The name of element to which this attribute belongs.
  751        *
  752        * @return true if the non-normalized and normalized value are the same
  753        * 
  754        * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
  755        * at the time of calling is lost.
  756        **/
  757       protected boolean scanAttributeValue(XMLString value, 
  758                                         XMLString nonNormalizedValue,
  759                                         String atName,
  760                                         boolean checkEntities,String eleName)
  761           throws IOException, XNIException
  762       {
  763           // quote
  764           int quote = fEntityScanner.peekChar();
  765           if (quote != '\'' && quote != '"') {
  766   			reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName});
  767           }
  768   
  769           fEntityScanner.scanChar();
  770           int entityDepth = fEntityDepth;
  771   
  772           int c = fEntityScanner.scanLiteral(quote, value);
  773           if (DEBUG_ATTR_NORMALIZATION) {
  774               System.out.println("** scanLiteral -> \""
  775                                  + value.toString() + "\"");
  776           }
  777           
  778           int fromIndex = 0;
  779           if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
  780               /** Both the non-normalized and normalized attribute values are equal. **/
  781               nonNormalizedValue.setValues(value);
  782               int cquote = fEntityScanner.scanChar();
  783               if (cquote != quote) {
  784                   reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
  785               }
  786               return true;
  787           }
  788           fStringBuffer2.clear();
  789           fStringBuffer2.append(value);
  790           normalizeWhitespace(value, fromIndex);
  791           if (DEBUG_ATTR_NORMALIZATION) {
  792               System.out.println("** normalizeWhitespace -> \""
  793                                  + value.toString() + "\"");
  794           }
  795           if (c != quote) {
  796               fScanningAttribute = true;
  797               fStringBuffer.clear();
  798               do {
  799                   fStringBuffer.append(value);
  800                   if (DEBUG_ATTR_NORMALIZATION) {
  801                       System.out.println("** value2: \""
  802                                          + fStringBuffer.toString() + "\"");
  803                   }
  804                   if (c == '&') {
  805                       fEntityScanner.skipChar('&');
  806                       if (entityDepth == fEntityDepth) {
  807                           fStringBuffer2.append('&');
  808                       }
  809                       if (fEntityScanner.skipChar('#')) {
  810                           if (entityDepth == fEntityDepth) {
  811                               fStringBuffer2.append('#');
  812                           }
  813                           int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
  814                           if (ch != -1) {
  815                               if (DEBUG_ATTR_NORMALIZATION) {
  816                                   System.out.println("** value3: \""
  817                                                      + fStringBuffer.toString()
  818                                                      + "\"");
  819                               }
  820                           }
  821                       }
  822                       else {
  823                           String entityName = fEntityScanner.scanName();
  824                           if (entityName == null) {
  825                               reportFatalError("NameRequiredInReference", null);
  826                           }
  827                           else if (entityDepth == fEntityDepth) {
  828                               fStringBuffer2.append(entityName);
  829                           }
  830                           if (!fEntityScanner.skipChar(';')) {
  831                               reportFatalError("SemicolonRequiredInReference",
  832                                                new Object []{entityName});
  833                           }
  834                           else if (entityDepth == fEntityDepth) {
  835                               fStringBuffer2.append(';');
  836                           }
  837                           if (entityName == fAmpSymbol) {
  838                               fStringBuffer.append('&');
  839                               if (DEBUG_ATTR_NORMALIZATION) {
  840                                   System.out.println("** value5: \""
  841                                                      + fStringBuffer.toString()
  842                                                      + "\"");
  843                               }
  844                           }
  845                           else if (entityName == fAposSymbol) {
  846                               fStringBuffer.append('\'');
  847                               if (DEBUG_ATTR_NORMALIZATION) {
  848                                   System.out.println("** value7: \""
  849                                                      + fStringBuffer.toString()
  850                                                      + "\"");
  851                               }
  852                           }
  853                           else if (entityName == fLtSymbol) {
  854                               fStringBuffer.append('<');
  855                               if (DEBUG_ATTR_NORMALIZATION) {
  856                                   System.out.println("** value9: \""
  857                                                      + fStringBuffer.toString()
  858                                                      + "\"");
  859                               }
  860                           }
  861                           else if (entityName == fGtSymbol) {
  862                               fStringBuffer.append('>');
  863                               if (DEBUG_ATTR_NORMALIZATION) {
  864                                   System.out.println("** valueB: \""
  865                                                      + fStringBuffer.toString()
  866                                                      + "\"");
  867                               }
  868                           }
  869                           else if (entityName == fQuotSymbol) {
  870                               fStringBuffer.append('"');
  871                               if (DEBUG_ATTR_NORMALIZATION) {
  872                                   System.out.println("** valueD: \""
  873                                                      + fStringBuffer.toString()
  874                                                      + "\"");
  875                               }
  876                           }
  877                           else {
  878                               if (fEntityManager.isExternalEntity(entityName)) {
  879                                   reportFatalError("ReferenceToExternalEntity",
  880                                                    new Object[] { entityName });
  881                               }
  882                               else {
  883                                   if (!fEntityManager.isDeclaredEntity(entityName)) {
  884                                       //WFC & VC: Entity Declared
  885                                       if (checkEntities) {
  886                                           if (fValidation) {
  887                                               fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  888                                                                          "EntityNotDeclared",
  889                                                                          new Object[]{entityName},
  890                                                                          XMLErrorReporter.SEVERITY_ERROR);
  891                                           }
  892                                       }
  893                                       else {
  894                                           reportFatalError("EntityNotDeclared",
  895                                                            new Object[]{entityName});
  896                                       }
  897                                   }
  898                                   fEntityManager.startEntity(entityName, true);
  899                               }
  900                           }
  901                       }
  902                   }
  903                   else if (c == '<') {
  904                       reportFatalError("LessthanInAttValue",
  905   									 new Object[] { eleName, atName });
  906                       fEntityScanner.scanChar();
  907                       if (entityDepth == fEntityDepth) {
  908                           fStringBuffer2.append((char)c);
  909                       }
  910                   }
  911                   else if (c == '%' || c == ']') {
  912                       fEntityScanner.scanChar();
  913                       fStringBuffer.append((char)c);
  914                       if (entityDepth == fEntityDepth) {
  915                           fStringBuffer2.append((char)c);
  916                       }
  917                       if (DEBUG_ATTR_NORMALIZATION) {
  918                           System.out.println("** valueF: \""
  919                                              + fStringBuffer.toString() + "\"");
  920                       }
  921                   }
  922                   else if (c == '\n' || c == '\r') {
  923                       fEntityScanner.scanChar();
  924                       fStringBuffer.append(' ');
  925                       if (entityDepth == fEntityDepth) {
  926                           fStringBuffer2.append('\n');
  927                       }
  928                   }
  929                   else if (c != -1 && XMLChar.isHighSurrogate(c)) {
  930                       fStringBuffer3.clear();
  931                       if (scanSurrogates(fStringBuffer3)) {
  932                           fStringBuffer.append(fStringBuffer3);
  933                           if (entityDepth == fEntityDepth) {
  934                               fStringBuffer2.append(fStringBuffer3);
  935                           }
  936                           if (DEBUG_ATTR_NORMALIZATION) {
  937                               System.out.println("** valueI: \""
  938                                                  + fStringBuffer.toString()
  939                                                  + "\"");
  940                           }
  941                       }
  942                   }
  943                   else if (c != -1 && isInvalidLiteral(c)) {
  944                       reportFatalError("InvalidCharInAttValue",
  945   					new Object[] {eleName, atName, Integer.toString(c, 16)});
  946                       fEntityScanner.scanChar();
  947                       if (entityDepth == fEntityDepth) {
  948                           fStringBuffer2.append((char)c);
  949                       }
  950                   }
  951                   c = fEntityScanner.scanLiteral(quote, value);
  952                   if (entityDepth == fEntityDepth) {
  953                       fStringBuffer2.append(value);
  954                   }
  955                   normalizeWhitespace(value);
  956               } while (c != quote || entityDepth != fEntityDepth);
  957               fStringBuffer.append(value);
  958               if (DEBUG_ATTR_NORMALIZATION) {
  959                   System.out.println("** valueN: \""
  960                                      + fStringBuffer.toString() + "\"");
  961               }
  962               value.setValues(fStringBuffer);
  963               fScanningAttribute = false;
  964           }
  965           nonNormalizedValue.setValues(fStringBuffer2);
  966   
  967           // quote
  968           int cquote = fEntityScanner.scanChar();
  969           if (cquote != quote) {
  970   			reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
  971           }
  972           return nonNormalizedValue.equals(value.ch, value.offset, value.length);
  973           
  974       } // scanAttributeValue()
  975   
  976   
  977       /**
  978        * Scans External ID and return the public and system IDs.
  979        *
  980        * @param identifiers An array of size 2 to return the system id,
  981        *                    and public id (in that order).
  982        * @param optionalSystemId Specifies whether the system id is optional.
  983        *
  984        * <strong>Note:</strong> This method uses fString and fStringBuffer,
  985        * anything in them at the time of calling is lost.
  986        */
  987       protected void scanExternalID(String[] identifiers,
  988                                     boolean optionalSystemId)
  989           throws IOException, XNIException {
  990   
  991           String systemId = null;
  992           String publicId = null;
  993           if (fEntityScanner.skipString("PUBLIC")) {
  994               if (!fEntityScanner.skipSpaces()) {
  995                   reportFatalError("SpaceRequiredAfterPUBLIC", null);
  996               }
  997               scanPubidLiteral(fString);
  998               publicId = fString.toString();
  999   
 1000               if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
 1001                   reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
 1002               }
 1003           }
 1004   
 1005           if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
 1006               if (publicId == null && !fEntityScanner.skipSpaces()) {
 1007                   reportFatalError("SpaceRequiredAfterSYSTEM", null);
 1008               }
 1009               int quote = fEntityScanner.peekChar();
 1010               if (quote != '\'' && quote != '"') {
 1011                   if (publicId != null && optionalSystemId) {
 1012                       // looks like we don't have any system id
 1013                       // simply return the public id
 1014                       identifiers[0] = null;
 1015                       identifiers[1] = publicId;
 1016                       return;
 1017                   }
 1018                   reportFatalError("QuoteRequiredInSystemID", null);
 1019               }
 1020               fEntityScanner.scanChar();
 1021               XMLString ident = fString;
 1022               if (fEntityScanner.scanLiteral(quote, ident) != quote) {
 1023                   fStringBuffer.clear();
 1024                   do {
 1025                       fStringBuffer.append(ident);
 1026                       int c = fEntityScanner.peekChar();
 1027                       if (XMLChar.isMarkup(c) || c == ']') {
 1028                           fStringBuffer.append((char)fEntityScanner.scanChar());
 1029                       }
 1030                   } while (fEntityScanner.scanLiteral(quote, ident) != quote);
 1031                   fStringBuffer.append(ident);
 1032                   ident = fStringBuffer;
 1033               }
 1034               systemId = ident.toString();
 1035               if (!fEntityScanner.skipChar(quote)) {
 1036                   reportFatalError("SystemIDUnterminated", null);
 1037               }
 1038           }
 1039   
 1040           // store result in array
 1041           identifiers[0] = systemId;
 1042           identifiers[1] = publicId;
 1043       }
 1044   
 1045   
 1046       /**
 1047        * Scans public ID literal.
 1048        *
 1049        * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 
 1050        * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
 1051        *
 1052        * The returned string is normalized according to the following rule,
 1053        * from http://www.w3.org/TR/REC-xml#dt-pubid:
 1054        *
 1055        * Before a match is attempted, all strings of white space in the public
 1056        * identifier must be normalized to single space characters (#x20), and
 1057        * leading and trailing white space must be removed.
 1058        *
 1059        * @param literal The string to fill in with the public ID literal.
 1060        * @return True on success.
 1061        *
 1062        * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
 1063        * the time of calling is lost.
 1064        */
 1065       protected boolean scanPubidLiteral(XMLString literal)
 1066           throws IOException, XNIException
 1067       {
 1068           int quote = fEntityScanner.scanChar();
 1069           if (quote != '\'' && quote != '"') {
 1070               reportFatalError("QuoteRequiredInPublicID", null);
 1071               return false;
 1072           }
 1073   
 1074           fStringBuffer.clear();
 1075           // skip leading whitespace
 1076           boolean skipSpace = true;
 1077           boolean dataok = true;
 1078           while (true) {
 1079               int c = fEntityScanner.scanChar();
 1080               if (c == ' ' || c == '\n' || c == '\r') {
 1081                   if (!skipSpace) {
 1082                       // take the first whitespace as a space and skip the others
 1083                       fStringBuffer.append(' ');
 1084                       skipSpace = true;
 1085                   }
 1086               }
 1087               else if (c == quote) {
 1088                   if (skipSpace) {
 1089                       // if we finished on a space let's trim it
 1090                       fStringBuffer.length--;
 1091                   }
 1092                   literal.setValues(fStringBuffer);
 1093                   break;
 1094               }
 1095               else if (XMLChar.isPubid(c)) {
 1096                   fStringBuffer.append((char)c);
 1097                   skipSpace = false;
 1098               }
 1099               else if (c == -1) {
 1100                   reportFatalError("PublicIDUnterminated", null);
 1101                   return false;
 1102               }
 1103               else {
 1104                   dataok = false;
 1105                   reportFatalError("InvalidCharInPublicID",
 1106                                    new Object[]{Integer.toHexString(c)});
 1107               }
 1108           }
 1109           return dataok;
 1110      }
 1111   
 1112   
 1113       /**
 1114        * Normalize whitespace in an XMLString converting all whitespace
 1115        * characters to space characters.
 1116        */
 1117       protected void normalizeWhitespace(XMLString value) {
 1118           int end = value.offset + value.length;
 1119           for (int i = value.offset; i < end; ++i) {
 1120               int c = value.ch[i];
 1121               // Performance: For XML 1.0 documents take advantage of 
 1122               // the fact that the only legal characters below 0x20 
 1123               // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
 1124               // already determined the well-formedness of these
 1125               // characters it is sufficient (and safe) to check
 1126               // against 0x20. -- mrglavas
 1127               if (c < 0x20) {
 1128                   value.ch[i] = ' ';
 1129               }
 1130           }
 1131       }
 1132       
 1133       /**
 1134        * Normalize whitespace in an XMLString converting all whitespace
 1135        * characters to space characters.
 1136        */
 1137       protected void normalizeWhitespace(XMLString value, int fromIndex) {
 1138           int end = value.offset + value.length;
 1139           for (int i = value.offset + fromIndex; i < end; ++i) {
 1140               int c = value.ch[i];
 1141               // Performance: For XML 1.0 documents take advantage of 
 1142               // the fact that the only legal characters below 0x20 
 1143               // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
 1144               // already determined the well-formedness of these
 1145               // characters it is sufficient (and safe) to check
 1146               // against 0x20. -- mrglavas
 1147               if (c < 0x20) {
 1148                   value.ch[i] = ' ';
 1149               }
 1150           }
 1151       }
 1152       
 1153       /**
 1154        * Checks whether this string would be unchanged by normalization.
 1155        * 
 1156        * @return -1 if the value would be unchanged by normalization,
 1157        * otherwise the index of the first whitespace character which
 1158        * would be transformed.
 1159        */
 1160       protected int isUnchangedByNormalization(XMLString value) {
 1161           int end = value.offset + value.length;
 1162           for (int i = value.offset; i < end; ++i) {
 1163               int c = value.ch[i];
 1164               // Performance: For XML 1.0 documents take advantage of 
 1165               // the fact that the only legal characters below 0x20 
 1166               // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
 1167               // already determined the well-formedness of these
 1168               // characters it is sufficient (and safe) to check
 1169               // against 0x20. -- mrglavas
 1170               if (c < 0x20) {
 1171                   return i - value.offset;
 1172               }
 1173           }
 1174           return -1;
 1175       }
 1176   
 1177       //
 1178       // XMLEntityHandler methods
 1179       //
 1180   
 1181       /**
 1182        * This method notifies of the start of an entity. The document entity
 1183        * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 
 1184        * parameter entity names start with '%'; and general entities are just
 1185        * specified by their name.
 1186        * 
 1187        * @param name     The name of the entity.
 1188        * @param identifier The resource identifier.
 1189        * @param encoding The auto-detected IANA encoding name of the entity
 1190        *                 stream. This value will be null in those situations
 1191        *                 where the entity encoding is not auto-detected (e.g.
 1192        *                 internal entities or a document entity that is
 1193        *                 parsed from a java.io.Reader).
 1194        * @param augs     Additional information that may include infoset augmentations
 1195        *
 1196        * @throws XNIException Thrown by handler to signal an error.
 1197        */
 1198       public void startEntity(String name, 
 1199                               XMLResourceIdentifier identifier,
 1200                               String encoding, Augmentations augs) throws XNIException {
 1201   
 1202           // keep track of the entity depth
 1203           fEntityDepth++;
 1204           // must reset entity scanner
 1205           fEntityScanner = fEntityManager.getEntityScanner();
 1206   
 1207       } // startEntity(String,XMLResourceIdentifier,String)
 1208   
 1209       /**
 1210        * This method notifies the end of an entity. The document entity has
 1211        * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 
 1212        * parameter entity names start with '%'; and general entities are just
 1213        * specified by their name.
 1214        * 
 1215        * @param name The name of the entity.
 1216        * @param augs Additional information that may include infoset augmentations
 1217        *
 1218        * @throws XNIException Thrown by handler to signal an error.
 1219        */
 1220       public void endEntity(String name, Augmentations augs) throws XNIException {
 1221   
 1222           // keep track of the entity depth
 1223           fEntityDepth--;
 1224   
 1225       } // endEntity(String)
 1226   
 1227       /**
 1228        * Scans a character reference and append the corresponding chars to the
 1229        * specified buffer.
 1230        *
 1231        * <p>
 1232        * <pre>
 1233        * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 1234        * </pre>
 1235        *
 1236        * <strong>Note:</strong> This method uses fStringBuffer, anything in it
 1237        * at the time of calling is lost.
 1238        *
 1239        * @param buf the character buffer to append chars to
 1240        * @param buf2 the character buffer to append non-normalized chars to
 1241        *
 1242        * @return the character value or (-1) on conversion failure
 1243        */
 1244       protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2) 
 1245           throws IOException, XNIException {
 1246   
 1247           // scan hexadecimal value
 1248           boolean hex = false;
 1249           if (fEntityScanner.skipChar('x')) {
 1250               if (buf2 != null) { buf2.append('x'); }
 1251               hex = true;
 1252               fStringBuffer3.clear();
 1253               boolean digit = true;
 1254               
 1255               int c = fEntityScanner.peekChar();
 1256               digit = (c >= '0' && c <= '9') ||
 1257                       (c >= 'a' && c <= 'f') ||
 1258                       (c >= 'A' && c <= 'F');
 1259               if (digit) {
 1260                   if (buf2 != null) { buf2.append((char)c); }
 1261                   fEntityScanner.scanChar();
 1262                   fStringBuffer3.append((char)c);
 1263                   
 1264                   do {
 1265                       c = fEntityScanner.peekChar();
 1266                       digit = (c >= '0' && c <= '9') ||
 1267                               (c >= 'a' && c <= 'f') ||
 1268                               (c >= 'A' && c <= 'F');
 1269                       if (digit) {
 1270                           if (buf2 != null) { buf2.append((char)c); }
 1271                           fEntityScanner.scanChar();
 1272                           fStringBuffer3.append((char)c);
 1273                       }
 1274                   } while (digit);
 1275               }
 1276               else {
 1277                   reportFatalError("HexdigitRequiredInCharRef", null);
 1278               }
 1279           }
 1280   
 1281           // scan decimal value
 1282           else {
 1283               fStringBuffer3.clear();
 1284               boolean digit = true;
 1285               
 1286               int c = fEntityScanner.peekChar();
 1287               digit = c >= '0' && c <= '9';
 1288               if (digit) {
 1289                   if (buf2 != null) { buf2.append((char)c); }
 1290                   fEntityScanner.scanChar();
 1291                   fStringBuffer3.append((char)c);
 1292                   
 1293                   do {
 1294                       c = fEntityScanner.peekChar();
 1295                       digit = c >= '0' && c <= '9';
 1296                       if (digit) {
 1297                           if (buf2 != null) { buf2.append((char)c); }
 1298                           fEntityScanner.scanChar();
 1299                           fStringBuffer3.append((char)c);
 1300                       }
 1301                   } while (digit);
 1302               }
 1303               else {
 1304                   reportFatalError("DigitRequiredInCharRef", null);
 1305               }
 1306           }
 1307   
 1308           // end
 1309           if (!fEntityScanner.skipChar(';')) {
 1310               reportFatalError("SemicolonRequiredInCharRef", null);
 1311           }
 1312           if (buf2 != null) { buf2.append(';'); }
 1313           
 1314           // convert string to number
 1315           int value = -1;
 1316           try {
 1317               value = Integer.parseInt(fStringBuffer3.toString(),
 1318                                        hex ? 16 : 10);
 1319               
 1320               // character reference must be a valid XML character
 1321               if (isInvalid(value)) {
 1322               	StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
 1323                   if (hex) errorBuf.append('x');
 1324                   errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
 1325                   reportFatalError("InvalidCharRef",
 1326                                    new Object[]{errorBuf.toString()});
 1327               }
 1328           }
 1329           catch (NumberFormatException e) {
 1330               // Conversion failed, let -1 value drop through.
 1331               // If we end up here, the character reference was invalid.
 1332               StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
 1333               if (hex) errorBuf.append('x');
 1334               errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
 1335               reportFatalError("InvalidCharRef",
 1336                                new Object[]{errorBuf.toString()});
 1337           }
 1338   
 1339           // append corresponding chars to the given buffer
 1340           if (!XMLChar.isSupplemental(value)) {
 1341               buf.append((char) value);
 1342           }
 1343           else {
 1344               // character is supplemental, split it into surrogate chars
 1345               buf.append(XMLChar.highSurrogate(value));
 1346               buf.append(XMLChar.lowSurrogate(value));
 1347           }
 1348   
 1349           // char refs notification code
 1350           if (fNotifyCharRefs && value != -1) {
 1351               String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
 1352               if (!fScanningAttribute) {
 1353                   fCharRefLiteral = literal;
 1354               }
 1355           }
 1356                   
 1357           return value;
 1358       }
 1359   
 1360       // returns true if the given character is not
 1361       // valid with respect to the version of
 1362       // XML understood by this scanner.
 1363       protected boolean isInvalid(int value) {
 1364           return (XMLChar.isInvalid(value)); 
 1365       } // isInvalid(int):  boolean
 1366   
 1367       // returns true if the given character is not
 1368       // valid or may not be used outside a character reference 
 1369       // with respect to the version of XML understood by this scanner.
 1370       protected boolean isInvalidLiteral(int value) {
 1371           return (XMLChar.isInvalid(value)); 
 1372       } // isInvalidLiteral(int):  boolean
 1373   
 1374       // returns true if the given character is 
 1375       // a valid nameChar with respect to the version of
 1376       // XML understood by this scanner.
 1377       protected boolean isValidNameChar(int value) {
 1378           return (XMLChar.isName(value)); 
 1379       } // isValidNameChar(int):  boolean
 1380   
 1381       // returns true if the given character is 
 1382       // a valid nameStartChar with respect to the version of
 1383       // XML understood by this scanner.
 1384       protected boolean isValidNameStartChar(int value) {
 1385           return (XMLChar.isNameStart(value)); 
 1386       } // isValidNameStartChar(int):  boolean
 1387       
 1388       // returns true if the given character is
 1389       // a valid NCName character with respect to the version of
 1390       // XML understood by this scanner.
 1391       protected boolean isValidNCName(int value) {
 1392           return (XMLChar.isNCName(value));
 1393       } // isValidNCName(int):  boolean
 1394       
 1395       // returns true if the given character is 
 1396       // a valid high surrogate for a nameStartChar 
 1397       // with respect to the version of XML understood 
 1398       // by this scanner.
 1399       protected boolean isValidNameStartHighSurrogate(int value) {
 1400           return false; 
 1401       } // isValidNameStartHighSurrogate(int):  boolean
 1402       
 1403       protected boolean versionSupported(String version ) {
 1404           return version.equals("1.0");
 1405       } // version Supported
 1406       
 1407       // returns the error message key for unsupported
 1408       // versions of XML with respect to the version of
 1409       // XML understood by this scanner.
 1410       protected String getVersionNotSupportedKey () {
 1411           return "VersionNotSupported";
 1412       } // getVersionNotSupportedKey: String
 1413   
 1414       /**
 1415        * Scans surrogates and append them to the specified buffer.
 1416        * <p>
 1417        * <strong>Note:</strong> This assumes the current char has already been
 1418        * identified as a high surrogate.
 1419        *
 1420        * @param buf The StringBuffer to append the read surrogates to.
 1421        * @return True if it succeeded.
 1422        */
 1423       protected boolean scanSurrogates(XMLStringBuffer buf)
 1424           throws IOException, XNIException {
 1425   
 1426           int high = fEntityScanner.scanChar();
 1427           int low = fEntityScanner.peekChar();
 1428           if (!XMLChar.isLowSurrogate(low)) {
 1429               reportFatalError("InvalidCharInContent",
 1430                                new Object[] {Integer.toString(high, 16)});
 1431               return false;
 1432           }
 1433           fEntityScanner.scanChar();
 1434   
 1435           // convert surrogates to supplemental character
 1436           int c = XMLChar.supplemental((char)high, (char)low);
 1437   
 1438           // supplemental character must be a valid XML character
 1439           if (isInvalid(c)) {
 1440               reportFatalError("InvalidCharInContent",
 1441                                new Object[]{Integer.toString(c, 16)}); 
 1442               return false;
 1443           }
 1444   
 1445           // fill in the buffer
 1446           buf.append((char)high);
 1447           buf.append((char)low);
 1448   
 1449           return true;
 1450   
 1451       } // scanSurrogates():boolean
 1452   
 1453   
 1454       /**
 1455        * Convenience function used in all XML scanners.
 1456        */
 1457       protected void reportFatalError(String msgId, Object[] args)
 1458           throws XNIException {
 1459           fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 1460                                      msgId, args,
 1461                                      XMLErrorReporter.SEVERITY_FATAL_ERROR);
 1462       }
 1463   
 1464       // private methods
 1465       private void init() { 
 1466           fEntityScanner = null;       
 1467           // initialize vars
 1468           fEntityDepth = 0;
 1469           fReportEntity = true;
 1470           fResourceIdentifier.clear();
 1471       } 
 1472   
 1473   } // class XMLScanner

Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » [javadoc | source]