Save This Page
Home » openjdk-7 » com.sun.org.apache.xerces.internal » impl » [javadoc | source]
    1   /*
    2    * Portions Copyright 2003-2006 Sun Microsystems, Inc.  All Rights Reserved.
    3    */
    4   
    5   /*
    6    * Copyright 2005 The Apache Software Foundation.
    7    *
    8    * Licensed under the Apache License, Version 2.0 (the "License");
    9    * you may not use this file except in compliance with the License.
   10    * You may obtain a copy of the License at
   11    *
   12    *      http://www.apache.org/licenses/LICENSE-2.0
   13    *
   14    * Unless required by applicable law or agreed to in writing, software
   15    * distributed under the License is distributed on an "AS IS" BASIS,
   16    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   17    * See the License for the specific language governing permissions and
   18    * limitations under the License.
   19    */
   20   
   21   package com.sun.org.apache.xerces.internal.impl;
   22   
   23   import com.sun.xml.internal.stream.Entity;
   24   import com.sun.xml.internal.stream.XMLBufferListener;
   25   import java.io.IOException;
   26   import java.io.InputStream;
   27   import java.io.InputStreamReader;
   28   import java.io.Reader;
   29   
   30   import java.util.Locale;
   31   import java.util.Vector;
   32   import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
   33   import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
   34   import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
   35   
   36   
   37   import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
   38   import com.sun.org.apache.xerces.internal.util.EncodingMap;
   39   
   40   import com.sun.org.apache.xerces.internal.util.SymbolTable;
   41   import com.sun.org.apache.xerces.internal.util.XMLChar;
   42   import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
   43   import com.sun.org.apache.xerces.internal.xni.QName;
   44   import com.sun.org.apache.xerces.internal.xni.XMLString;
   45   import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
   46   import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
   47   import com.sun.org.apache.xerces.internal.xni;
   48   
   49   /**
   50    * Implements the entity scanner methods.
   51    *
   52    * @author Neeraj Bajaj, Sun Microsystems
   53    * @author Andy Clark, IBM
   54    * @author Arnaud  Le Hors, IBM
   55    * @author K.Venugopal Sun Microsystems
   56    *
   57    */
   58   public class XMLEntityScanner implements XMLLocator  {
   59   
   60   
   61       protected Entity.ScannedEntity fCurrentEntity = null ;
   62       protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
   63   
   64       protected XMLEntityManager fEntityManager ;
   65   
   66       /** Debug switching readers for encodings. */
   67       private static final boolean DEBUG_ENCODINGS = false;
   68       /** Listeners which should know when load is being called */
   69       private Vector listeners = new Vector();
   70   
   71       public static final boolean [] VALID_NAMES = new boolean[127];
   72   
   73       /**
   74        * Debug printing of buffer. This debugging flag works best when you
   75        * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
   76        * 64 characters.
   77        */
   78       private static final boolean DEBUG_BUFFER = false;
   79       private static final boolean DEBUG_SKIP_STRING = false;
   80   
   81       protected SymbolTable fSymbolTable = null;
   82       protected XMLErrorReporter fErrorReporter = null;
   83       int [] whiteSpaceLookup = new int[100];
   84       int whiteSpaceLen = 0;
   85       boolean whiteSpaceInfoNeeded = true;
   86   
   87       /**
   88        * Allow Java encoding names. This feature identifier is:
   89        * http://apache.org/xml/features/allow-java-encodings
   90        */
   91       protected boolean fAllowJavaEncodings;
   92   
   93       //Will be used only during internal subsets.
   94       //for appending data.
   95   
   96       /** Property identifier: symbol table. */
   97       protected static final String SYMBOL_TABLE =
   98               Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
   99   
  100       /** Property identifier: error reporter. */
  101       protected static final String ERROR_REPORTER =
  102               Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
  103   
  104       /** Feature identifier: allow Java encodings. */
  105       protected static final String ALLOW_JAVA_ENCODINGS =
  106               Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
  107   
  108       protected PropertyManager fPropertyManager = null ;
  109   
  110       boolean isExternal = false;
  111       static {
  112   
  113           for(int i=0x0041;i<=0x005A ; i++){
  114               VALID_NAMES[i]=true;
  115           }
  116           for(int i=0x0061;i<=0x007A; i++){
  117               VALID_NAMES[i]=true;
  118           }
  119           for(int i=0x0030;i<=0x0039; i++){
  120               VALID_NAMES[i]=true;
  121           }
  122           VALID_NAMES[45]=true;
  123           VALID_NAMES[46]=true;
  124           VALID_NAMES[58]=true;
  125           VALID_NAMES[95]=true;
  126       }
  127   
  128       //
  129       // Constructors
  130       //
  131   
  132       /** Default constructor. */
  133       public XMLEntityScanner() {
  134       } // <init>()
  135   
  136   
  137       /**  private constructor, this class can only be instantiated within this class. Instance of this class should
  138        *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
  139        *    @see getEntityScanner()
  140        *    @see getEntityScanner(ScannedEntity)
  141        */
  142       public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
  143           fEntityManager = entityManager ;
  144           reset(propertyManager);
  145       } // <init>()
  146   
  147   
  148       // set buffer size:
  149       public void setBufferSize(int size) {
  150           // REVISIT: Buffer size passed to entity scanner
  151           // was not being kept in synch with the actual size
  152           // of the buffers in each scanned entity. If any
  153           // of the buffers were actually resized, it was possible
  154           // that the parser would throw an ArrayIndexOutOfBoundsException
  155           // for documents which contained names which are longer than
  156           // the current buffer size. Conceivably the buffer size passed
  157           // to entity scanner could be used to determine a minimum size
  158           // for resizing, if doubling its size is smaller than this
  159           // minimum. -- mrglavas
  160           fBufferSize = size;
  161       }
  162   
  163       /**
  164        * Resets the components.
  165        */
  166       public void reset(PropertyManager propertyManager){
  167           fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
  168           fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
  169           fCurrentEntity = null;
  170           whiteSpaceLen = 0;
  171           whiteSpaceInfoNeeded = true;
  172           listeners.clear();
  173       }
  174   
  175       /**
  176        * Resets the component. The component can query the component manager
  177        * about any features and properties that affect the operation of the
  178        * component.
  179        *
  180        * @param componentManager The component manager.
  181        *
  182        * @throws SAXException Thrown by component on initialization error.
  183        *                      For example, if a feature or property is
  184        *                      required for the operation of the component, the
  185        *                      component manager may throw a
  186        *                      SAXNotRecognizedException or a
  187        *                      SAXNotSupportedException.
  188        */
  189       public void reset(XMLComponentManager componentManager)
  190       throws XMLConfigurationException {
  191   
  192           //System.out.println(" this is being called");
  193           // xerces features
  194           try {
  195               fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS);
  196           } catch (XMLConfigurationException e) {
  197               fAllowJavaEncodings = false;
  198           }
  199   
  200           //xerces properties
  201           fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
  202           fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
  203           fCurrentEntity = null;
  204           whiteSpaceLen = 0;
  205           whiteSpaceInfoNeeded = true;
  206           listeners.clear();
  207       } // reset(XMLComponentManager)
  208   
  209   
  210       public void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
  211               XMLErrorReporter reporter) {
  212           fCurrentEntity = null;
  213           fSymbolTable = symbolTable;
  214           fEntityManager = entityManager;
  215           fErrorReporter = reporter;
  216       }
  217   
  218       /**
  219        * Returns the XML version of the current entity. This will normally be the
  220        * value from the XML or text declaration or defaulted by the parser. Note that
  221        * that this value may be different than the version of the processing rules
  222        * applied to the current entity. For instance, an XML 1.1 document may refer to
  223        * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
  224        * document. Also note that, for a given entity, this value can only be considered
  225        * final once the XML or text declaration has been read or once it has been
  226        * determined that there is no such declaration.
  227        */
  228       public String getXMLVersion() {
  229           if (fCurrentEntity != null) {
  230               return fCurrentEntity.xmlVersion;
  231           }
  232           return null;
  233       } // getXMLVersion():String
  234   
  235       void setXMLVersion(String version) {
  236           fCurrentEntity.xmlVersion = version;
  237       }
  238   
  239       /** set the instance of current scanned entity.
  240        *   @param ScannedEntity
  241        */
  242   
  243       public  void setCurrentEntity(Entity.ScannedEntity scannedEntity){
  244           fCurrentEntity = scannedEntity ;
  245           if(fCurrentEntity != null){
  246               isExternal = fCurrentEntity.isExternal();
  247               if(DEBUG_BUFFER)
  248                   System.out.println("Current Entity is "+scannedEntity.name);
  249           }
  250       }
  251   
  252       public  Entity.ScannedEntity getCurrentEntity(){
  253           return fCurrentEntity ;
  254       }
  255       //
  256       // XMLEntityReader methods
  257       //
  258   
  259       /**
  260        * Returns the base system identifier of the currently scanned
  261        * entity, or null if none is available.
  262        */
  263       public String getBaseSystemId() {
  264           return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
  265       } // getBaseSystemId():String
  266   
  267       /**
  268        * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
  269        */
  270       public void setBaseSystemId(String systemId) {
  271           //no-op
  272       }
  273   
  274       ///////////// Locator methods start.
  275       public int getLineNumber(){
  276           //if the entity is closed, we should return -1
  277           //xxx at first place why such call should be there...
  278           return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
  279       }
  280   
  281       /**
  282        * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
  283        */
  284       public void setLineNumber(int line) {
  285           //no-op
  286       }
  287   
  288   
  289       public int getColumnNumber(){
  290           //if the entity is closed, we should return -1
  291           //xxx at first place why such call should be there...
  292           return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
  293       }
  294   
  295       /**
  296        * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
  297        */
  298       public void setColumnNumber(int col) {
  299           // no-op
  300       }
  301   
  302   
  303       public int getCharacterOffset(){
  304           return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
  305       }
  306   
  307       /** Returns the expanded system identifier.  */
  308       public String getExpandedSystemId() {
  309           return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
  310       }
  311   
  312       /**
  313        * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
  314        */
  315       public void setExpandedSystemId(String systemId) {
  316           //no-op
  317       }
  318   
  319       /** Returns the literal system identifier.  */
  320       public String getLiteralSystemId() {
  321           return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
  322       }
  323   
  324       /**
  325        * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
  326        */
  327       public void setLiteralSystemId(String systemId) {
  328           //no-op
  329       }
  330   
  331       /** Returns the public identifier.  */
  332       public String getPublicId() {
  333           return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
  334       }
  335   
  336       /**
  337        * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
  338        */
  339       public void setPublicId(String publicId) {
  340           //no-op
  341       }
  342   
  343       ///////////////// Locator methods finished.
  344   
  345       /** the version of the current entity being scanned */
  346       public void setVersion(String version){
  347           fCurrentEntity.version = version;
  348       }
  349   
  350       public String getVersion(){
  351           return fCurrentEntity.version ;
  352       }
  353   
  354       public String getEncoding(){
  355           return fCurrentEntity.encoding ;
  356       }
  357       /**
  358        * Sets the encoding of the scanner. This method is used by the
  359        * scanners if the XMLDecl or TextDecl line contains an encoding
  360        * pseudo-attribute.
  361        * <p>
  362        * <strong>Note:</strong> The underlying character reader on the
  363        * current entity will be changed to accomodate the new encoding.
  364        * However, the new encoding is ignored if the current reader was
  365        * not constructed from an input stream (e.g. an external entity
  366        * that is resolved directly to the appropriate java.io.Reader
  367        * object).
  368        *
  369        * @param encoding The IANA encoding name of the new encoding.
  370        *
  371        * @throws IOException Thrown if the new encoding is not supported.
  372        *
  373        * @see com.sun.org.apache.xerces.internal.util.EncodingMap
  374        */
  375       public void setEncoding(String encoding) throws IOException {
  376   
  377           if (DEBUG_ENCODINGS) {
  378               System.out.println("$$$ setEncoding: "+encoding);
  379           }
  380   
  381           if (fCurrentEntity.stream != null) {
  382               // if the encoding is the same, don't change the reader and
  383               // re-use the original reader used by the OneCharReader
  384               // NOTE: Besides saving an object, this overcomes deficiencies
  385               //       in the UTF-16 reader supplied with the standard Java
  386               //       distribution (up to and including 1.3). The UTF-16
  387               //       decoder buffers 8K blocks even when only asked to read
  388               //       a single char! -Ac
  389               if (fCurrentEntity.encoding == null ||
  390                       !fCurrentEntity.encoding.equals(encoding)) {
  391                   // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
  392                   // and we know the endian-ness, we shouldn't change readers.
  393                   // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
  394                   // the endian-ness from the encoding we presently have.
  395                   if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
  396                       String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
  397                       if(ENCODING.equals("UTF-16")) return;
  398                       if(ENCODING.equals("ISO-10646-UCS-4")) {
  399                           if(fCurrentEntity.encoding.equals("UTF-16BE")) {
  400                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
  401                           } else {
  402                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
  403                           }
  404                           return;
  405                       }
  406                       if(ENCODING.equals("ISO-10646-UCS-2")) {
  407                           if(fCurrentEntity.encoding.equals("UTF-16BE")) {
  408                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
  409                           } else {
  410                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
  411                           }
  412                           return;
  413                       }
  414                   }
  415                   // wrap a new reader around the input stream, changing
  416                   // the encoding
  417                   if (DEBUG_ENCODINGS) {
  418                       System.out.println("$$$ creating new reader from stream: "+
  419                               fCurrentEntity.stream);
  420                   }
  421                   //fCurrentEntity.stream.reset();
  422                   fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
  423                   fCurrentEntity.encoding = encoding;
  424   
  425               } else {
  426                   if (DEBUG_ENCODINGS)
  427                       System.out.println("$$$ reusing old reader on stream");
  428               }
  429           }
  430   
  431       } // setEncoding(String)
  432   
  433       /** Returns true if the current entity being scanned is external. */
  434       public boolean isExternal() {
  435           return fCurrentEntity.isExternal();
  436       } // isExternal():boolean
  437   
  438       public int getChar(int relative) throws IOException{
  439           if(arrangeCapacity(relative + 1, false)){
  440               return fCurrentEntity.ch[fCurrentEntity.position + relative];
  441           }else{
  442               return -1;
  443           }
  444       }//getChar()
  445   
  446       /**
  447        * Returns the next character on the input.
  448        * <p>
  449        * <strong>Note:</strong> The character is <em>not</em> consumed.
  450        *
  451        * @throws IOException  Thrown if i/o error occurs.
  452        * @throws EOFException Thrown on end of file.
  453        */
  454       public int peekChar() throws IOException {
  455           if (DEBUG_BUFFER) {
  456               System.out.print("(peekChar: ");
  457               print();
  458               System.out.println();
  459           }
  460   
  461           // load more characters, if needed
  462           if (fCurrentEntity.position == fCurrentEntity.count) {
  463               invokeListeners(0);
  464               load(0, true);
  465           }
  466   
  467           // peek at character
  468           int c = fCurrentEntity.ch[fCurrentEntity.position];
  469   
  470           // return peeked character
  471           if (DEBUG_BUFFER) {
  472               System.out.print(")peekChar: ");
  473               print();
  474               if (isExternal) {
  475                   System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
  476               } else {
  477                   System.out.println(" -> '"+(char)c+"'");
  478               }
  479           }
  480           if (isExternal) {
  481               return c != '\r' ? c : '\n';
  482           } else {
  483               return c;
  484           }
  485   
  486       } // peekChar():int
  487   
  488       /**
  489        * Returns the next character on the input.
  490        * <p>
  491        * <strong>Note:</strong> The character is consumed.
  492        *
  493        * @throws IOException  Thrown if i/o error occurs.
  494        * @throws EOFException Thrown on end of file.
  495        */
  496       public int scanChar() throws IOException {
  497           if (DEBUG_BUFFER) {
  498               System.out.print("(scanChar: ");
  499               print();
  500               System.out.println();
  501           }
  502   
  503           // load more characters, if needed
  504           if (fCurrentEntity.position == fCurrentEntity.count) {
  505               invokeListeners(0);
  506               load(0, true);
  507           }
  508   
  509           // scan character
  510           int c = fCurrentEntity.ch[fCurrentEntity.position++];
  511           if (c == '\n' ||
  512                   (c == '\r' && isExternal)) {
  513               fCurrentEntity.lineNumber++;
  514               fCurrentEntity.columnNumber = 1;
  515               if (fCurrentEntity.position == fCurrentEntity.count) {
  516                   invokeListeners(1);
  517                   fCurrentEntity.ch[0] = (char)c;
  518                   load(1, false);
  519               }
  520               if (c == '\r' && isExternal) {
  521                   if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
  522                       fCurrentEntity.position--;
  523                   }
  524                   c = '\n';
  525               }
  526           }
  527   
  528           // return character that was scanned
  529           if (DEBUG_BUFFER) {
  530               System.out.print(")scanChar: ");
  531               print();
  532               System.out.println(" -> '"+(char)c+"'");
  533           }
  534           fCurrentEntity.columnNumber++;
  535           return c;
  536   
  537       } // scanChar():int
  538   
  539       /**
  540        * Returns a string matching the NMTOKEN production appearing immediately
  541        * on the input as a symbol, or null if NMTOKEN Name string is present.
  542        * <p>
  543        * <strong>Note:</strong> The NMTOKEN characters are consumed.
  544        * <p>
  545        * <strong>Note:</strong> The string returned must be a symbol. The
  546        * SymbolTable can be used for this purpose.
  547        *
  548        * @throws IOException  Thrown if i/o error occurs.
  549        * @throws EOFException Thrown on end of file.
  550        *
  551        * @see com.sun.org.apache.xerces.internal.util.SymbolTable
  552        * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
  553        */
  554       public String scanNmtoken() throws IOException {
  555           if (DEBUG_BUFFER) {
  556               System.out.print("(scanNmtoken: ");
  557               print();
  558               System.out.println();
  559           }
  560   
  561           // load more characters, if needed
  562           if (fCurrentEntity.position == fCurrentEntity.count) {
  563               invokeListeners(0);
  564               load(0, true);
  565           }
  566   
  567           // scan nmtoken
  568           int offset = fCurrentEntity.position;
  569           boolean vc = false;
  570           char c;
  571           while (true){
  572               //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  573               c = fCurrentEntity.ch[fCurrentEntity.position];
  574               if(c < 127){
  575                   vc = VALID_NAMES[c];
  576               }else{
  577                   vc = XMLChar.isName(c);
  578               }
  579               if(!vc)break;
  580   
  581               if (++fCurrentEntity.position == fCurrentEntity.count) {
  582                   int length = fCurrentEntity.position - offset;
  583                   invokeListeners(length);
  584                   if (length == fCurrentEntity.fBufferSize) {
  585                       // bad luck we have to resize our buffer
  586                       char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
  587                       System.arraycopy(fCurrentEntity.ch, offset,
  588                               tmp, 0, length);
  589                       fCurrentEntity.ch = tmp;
  590                       fCurrentEntity.fBufferSize *= 2;
  591                   } else {
  592                       System.arraycopy(fCurrentEntity.ch, offset,
  593                               fCurrentEntity.ch, 0, length);
  594                   }
  595                   offset = 0;
  596                   if (load(length, false)) {
  597                       break;
  598                   }
  599               }
  600           }
  601           int length = fCurrentEntity.position - offset;
  602           fCurrentEntity.columnNumber += length;
  603   
  604           // return nmtoken
  605           String symbol = null;
  606           if (length > 0) {
  607               symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  608           }
  609           if (DEBUG_BUFFER) {
  610               System.out.print(")scanNmtoken: ");
  611               print();
  612               System.out.println(" -> "+String.valueOf(symbol));
  613           }
  614           return symbol;
  615   
  616       } // scanNmtoken():String
  617   
  618       /**
  619        * Returns a string matching the Name production appearing immediately
  620        * on the input as a symbol, or null if no Name string is present.
  621        * <p>
  622        * <strong>Note:</strong> The Name characters are consumed.
  623        * <p>
  624        * <strong>Note:</strong> The string returned must be a symbol. The
  625        * SymbolTable can be used for this purpose.
  626        *
  627        * @throws IOException  Thrown if i/o error occurs.
  628        * @throws EOFException Thrown on end of file.
  629        *
  630        * @see com.sun.org.apache.xerces.internal.util.SymbolTable
  631        * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
  632        * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
  633        */
  634       public String scanName() throws IOException {
  635           if (DEBUG_BUFFER) {
  636               System.out.print("(scanName: ");
  637               print();
  638               System.out.println();
  639           }
  640   
  641           // load more characters, if needed
  642           if (fCurrentEntity.position == fCurrentEntity.count) {
  643               invokeListeners(0);
  644               load(0, true);
  645           }
  646   
  647           // scan name
  648           int offset = fCurrentEntity.position;
  649           if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
  650               if (++fCurrentEntity.position == fCurrentEntity.count) {
  651                   invokeListeners(1);
  652                   fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  653                   offset = 0;
  654                   if (load(1, false)) {
  655                       fCurrentEntity.columnNumber++;
  656                       String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  657   
  658                       if (DEBUG_BUFFER) {
  659                           System.out.print(")scanName: ");
  660                           print();
  661                           System.out.println(" -> "+String.valueOf(symbol));
  662                       }
  663                       return symbol;
  664                   }
  665               }
  666               boolean vc =false;
  667               while (true ){
  668                   //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
  669                   char c = fCurrentEntity.ch[fCurrentEntity.position];
  670                   if(c < 127){
  671                       vc = VALID_NAMES[c];
  672                   }else{
  673                       vc = XMLChar.isName(c);
  674                   }
  675                   if(!vc)break;
  676                   if (++fCurrentEntity.position == fCurrentEntity.count) {
  677                       int length = fCurrentEntity.position - offset;
  678                       invokeListeners(length);
  679                       if (length == fCurrentEntity.fBufferSize) {
  680                           // bad luck we have to resize our buffer
  681                           char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
  682                           System.arraycopy(fCurrentEntity.ch, offset,
  683                                   tmp, 0, length);
  684                           fCurrentEntity.ch = tmp;
  685                           fCurrentEntity.fBufferSize *= 2;
  686                       } else {
  687                           System.arraycopy(fCurrentEntity.ch, offset,
  688                                   fCurrentEntity.ch, 0, length);
  689                       }
  690                       offset = 0;
  691                       if (load(length, false)) {
  692                           break;
  693                       }
  694                   }
  695               }
  696           }
  697           int length = fCurrentEntity.position - offset;
  698           fCurrentEntity.columnNumber += length;
  699   
  700           // return name
  701           String symbol;
  702           if (length > 0) {
  703               symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  704           } else
  705               symbol = null;
  706           if (DEBUG_BUFFER) {
  707               System.out.print(")scanName: ");
  708               print();
  709               System.out.println(" -> "+String.valueOf(symbol));
  710           }
  711           return symbol;
  712   
  713       } // scanName():String
  714   
  715       /**
  716        * Scans a qualified name from the input, setting the fields of the
  717        * QName structure appropriately.
  718        * <p>
  719        * <strong>Note:</strong> The qualified name characters are consumed.
  720        * <p>
  721        * <strong>Note:</strong> The strings used to set the values of the
  722        * QName structure must be symbols. The SymbolTable can be used for
  723        * this purpose.
  724        *
  725        * @param qname The qualified name structure to fill.
  726        *
  727        * @return Returns true if a qualified name appeared immediately on
  728        *         the input and was scanned, false otherwise.
  729        *
  730        * @throws IOException  Thrown if i/o error occurs.
  731        * @throws EOFException Thrown on end of file.
  732        *
  733        * @see com.sun.org.apache.xerces.internal.util.SymbolTable
  734        * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
  735        * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
  736        */
  737       public boolean scanQName(QName qname) throws IOException {
  738           if (DEBUG_BUFFER) {
  739               System.out.print("(scanQName, "+qname+": ");
  740               print();
  741               System.out.println();
  742           }
  743   
  744           // load more characters, if needed
  745           if (fCurrentEntity.position == fCurrentEntity.count) {
  746               invokeListeners(0);
  747               load(0, true);
  748           }
  749   
  750           // scan qualified name
  751           int offset = fCurrentEntity.position;
  752   
  753           //making a check if if the specified character is a valid name start character
  754           //as defined by production [5] in the XML 1.0 specification.
  755           // Name ::= (Letter | '_' | ':') (NameChar)*
  756   
  757           if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
  758               if (++fCurrentEntity.position == fCurrentEntity.count) {
  759                   invokeListeners(1);
  760                   fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  761                   offset = 0;
  762   
  763                   if (load(1, false)) {
  764                       fCurrentEntity.columnNumber++;
  765                       //adding into symbol table.
  766                       //XXX We are trying to add single character in SymbolTable??????
  767                       String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  768                       qname.setValues(null, name, name, null);
  769                       if (DEBUG_BUFFER) {
  770                           System.out.print(")scanQName, "+qname+": ");
  771                           print();
  772                           System.out.println(" -> true");
  773                       }
  774                       return true;
  775                   }
  776               }
  777               int index = -1;
  778               boolean vc = false;
  779               while ( true){
  780   
  781                   //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
  782                   char c = fCurrentEntity.ch[fCurrentEntity.position];
  783                   if(c < 127){
  784                       vc = VALID_NAMES[c];
  785                   }else{
  786                       vc = XMLChar.isName(c);
  787                   }
  788                   if(!vc)break;
  789                   if (c == ':') {
  790                       if (index != -1) {
  791                           break;
  792                       }
  793                       index = fCurrentEntity.position;
  794                   }
  795                   if (++fCurrentEntity.position == fCurrentEntity.count) {
  796                       int length = fCurrentEntity.position - offset;
  797                       invokeListeners(length);
  798                       if (length == fCurrentEntity.fBufferSize) {
  799                           // bad luck we have to resize our buffer
  800                           char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
  801                           System.arraycopy(fCurrentEntity.ch, offset,
  802                                   tmp, 0, length);
  803                           fCurrentEntity.ch = tmp;
  804                           fCurrentEntity.fBufferSize *= 2;
  805                       } else {
  806                           System.arraycopy(fCurrentEntity.ch, offset,
  807                                   fCurrentEntity.ch, 0, length);
  808                       }
  809                       if (index != -1) {
  810                           index = index - offset;
  811                       }
  812                       offset = 0;
  813                       if (load(length, false)) {
  814                           break;
  815                       }
  816                   }
  817               }
  818               int length = fCurrentEntity.position - offset;
  819               fCurrentEntity.columnNumber += length;
  820               if (length > 0) {
  821                   String prefix = null;
  822                   String localpart = null;
  823                   String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
  824                           offset, length);
  825   
  826                   if (index != -1) {
  827                       int prefixLength = index - offset;
  828                       prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
  829                               offset, prefixLength);
  830                       int len = length - prefixLength - 1;
  831                       localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
  832                               index + 1, len);
  833   
  834                   } else {
  835                       localpart = rawname;
  836                   }
  837                   qname.setValues(prefix, localpart, rawname, null);
  838                   if (DEBUG_BUFFER) {
  839                       System.out.print(")scanQName, "+qname+": ");
  840                       print();
  841                       System.out.println(" -> true");
  842                   }
  843                   return true;
  844               }
  845           }
  846   
  847           // no qualified name found
  848           if (DEBUG_BUFFER) {
  849               System.out.print(")scanQName, "+qname+": ");
  850               print();
  851               System.out.println(" -> false");
  852           }
  853           return false;
  854   
  855       } // scanQName(QName):boolean
  856   
  857       /**
  858        * CHANGED:
  859        * Scans a range of parsed character data, This function appends the character data to
  860        * the supplied buffer.
  861        * <p>
  862        * <strong>Note:</strong> The characters are consumed.
  863        * <p>
  864        * <strong>Note:</strong> This method does not guarantee to return
  865        * the longest run of parsed character data. This method may return
  866        * before markup due to reaching the end of the input buffer or any
  867        * other reason.
  868        * <p>
  869        *
  870        * @param content The content structure to fill.
  871        *
  872        * @return Returns the next character on the input, if known. This
  873        *         value may be -1 but this does <em>note</em> designate
  874        *         end of file.
  875        *
  876        * @throws IOException  Thrown if i/o error occurs.
  877        * @throws EOFException Thrown on end of file.
  878        */
  879       public int scanContent(XMLString content) throws IOException {
  880           if (DEBUG_BUFFER) {
  881               System.out.print("(scanContent: ");
  882               print();
  883               System.out.println();
  884           }
  885   
  886           // load more characters, if needed
  887           if (fCurrentEntity.position == fCurrentEntity.count) {
  888               invokeListeners(0);
  889               load(0, true);
  890           } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  891               invokeListeners(0);
  892               fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  893               load(1, false);
  894               fCurrentEntity.position = 0;
  895           }
  896   
  897           // normalize newlines
  898           int offset = fCurrentEntity.position;
  899           int c = fCurrentEntity.ch[offset];
  900           int newlines = 0;
  901           if (c == '\n' || (c == '\r' && isExternal)) {
  902               if (DEBUG_BUFFER) {
  903                   System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
  904                   print();
  905                   System.out.println();
  906               }
  907               do {
  908                   c = fCurrentEntity.ch[fCurrentEntity.position++];
  909                   if (c == '\r' && isExternal) {
  910                       newlines++;
  911                       fCurrentEntity.lineNumber++;
  912                       fCurrentEntity.columnNumber = 1;
  913                       if (fCurrentEntity.position == fCurrentEntity.count) {
  914                           offset = 0;
  915                           invokeListeners(newlines);
  916                           fCurrentEntity.position = newlines;
  917                           if (load(newlines, false)) {
  918                               break;
  919                           }
  920                       }
  921                       if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  922                           fCurrentEntity.position++;
  923                           offset++;
  924                       }
  925                       /*** NEWLINE NORMALIZATION ***/
  926                       else {
  927                           newlines++;
  928                       }
  929                   } else if (c == '\n') {
  930                       newlines++;
  931                       fCurrentEntity.lineNumber++;
  932                       fCurrentEntity.columnNumber = 1;
  933                       if (fCurrentEntity.position == fCurrentEntity.count) {
  934                           offset = 0;
  935                           invokeListeners(newlines);
  936                           fCurrentEntity.position = newlines;
  937                           if (load(newlines, false)) {
  938                               break;
  939                           }
  940                       }
  941                   } else {
  942                       fCurrentEntity.position--;
  943                       break;
  944                   }
  945               } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  946               for (int i = offset; i < fCurrentEntity.position; i++) {
  947                   fCurrentEntity.ch[i] = '\n';
  948               }
  949               int length = fCurrentEntity.position - offset;
  950               if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  951                   //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
  952                   //on buffering the data..
  953                   content.setValues(fCurrentEntity.ch, offset, length);
  954                   //content.append(fCurrentEntity.ch, offset, length);
  955                   if (DEBUG_BUFFER) {
  956                       System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  957                       print();
  958                       System.out.println();
  959                   }
  960                   return -1;
  961               }
  962               if (DEBUG_BUFFER) {
  963                   System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  964                   print();
  965                   System.out.println();
  966               }
  967           }
  968   
  969           while (fCurrentEntity.position < fCurrentEntity.count) {
  970               c = fCurrentEntity.ch[fCurrentEntity.position++];
  971               if (!XMLChar.isContent(c)) {
  972                   fCurrentEntity.position--;
  973                   break;
  974               }
  975           }
  976           int length = fCurrentEntity.position - offset;
  977           fCurrentEntity.columnNumber += length - newlines;
  978   
  979           //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
  980           //on buffering the data..
  981           content.setValues(fCurrentEntity.ch, offset, length);
  982           //content.append(fCurrentEntity.ch, offset, length);
  983           // return next character
  984           if (fCurrentEntity.position != fCurrentEntity.count) {
  985               c = fCurrentEntity.ch[fCurrentEntity.position];
  986               // REVISIT: Does this need to be updated to fix the
  987               //          #x0D ^#x0A newline normalization problem? -Ac
  988               if (c == '\r' && isExternal) {
  989                   c = '\n';
  990               }
  991           } else {
  992               c = -1;
  993           }
  994           if (DEBUG_BUFFER) {
  995               System.out.print(")scanContent: ");
  996               print();
  997               System.out.println(" -> '"+(char)c+"'");
  998           }
  999           return c;
 1000   
 1001       } // scanContent(XMLString):int
 1002   
 1003       /**
 1004        * Scans a range of attribute value data, setting the fields of the
 1005        * XMLString structure, appropriately.
 1006        * <p>
 1007        * <strong>Note:</strong> The characters are consumed.
 1008        * <p>
 1009        * <strong>Note:</strong> This method does not guarantee to return
 1010        * the longest run of attribute value data. This method may return
 1011        * before the quote character due to reaching the end of the input
 1012        * buffer or any other reason.
 1013        * <p>
 1014        * <strong>Note:</strong> The fields contained in the XMLString
 1015        * structure are not guaranteed to remain valid upon subsequent calls
 1016        * to the entity scanner. Therefore, the caller is responsible for
 1017        * immediately using the returned character data or making a copy of
 1018        * the character data.
 1019        *
 1020        * @param quote   The quote character that signifies the end of the
 1021        *                attribute value data.
 1022        * @param content The content structure to fill.
 1023        *
 1024        * @return Returns the next character on the input, if known. This
 1025        *         value may be -1 but this does <em>note</em> designate
 1026        *         end of file.
 1027        *
 1028        * @throws IOException  Thrown if i/o error occurs.
 1029        * @throws EOFException Thrown on end of file.
 1030        */
 1031       public int scanLiteral(int quote, XMLString content)
 1032       throws IOException {
 1033           if (DEBUG_BUFFER) {
 1034               System.out.print("(scanLiteral, '"+(char)quote+"': ");
 1035               print();
 1036               System.out.println();
 1037           }
 1038           // load more characters, if needed
 1039           if (fCurrentEntity.position == fCurrentEntity.count) {
 1040               invokeListeners(0);
 1041               load(0, true);
 1042           } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 1043               invokeListeners(0);
 1044               fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
 1045   
 1046               load(1, false);
 1047               fCurrentEntity.position = 0;
 1048           }
 1049   
 1050           // normalize newlines
 1051           int offset = fCurrentEntity.position;
 1052           int c = fCurrentEntity.ch[offset];
 1053           int newlines = 0;
 1054           if(whiteSpaceInfoNeeded)
 1055               whiteSpaceLen=0;
 1056           if (c == '\n' || (c == '\r' && isExternal)) {
 1057               if (DEBUG_BUFFER) {
 1058                   System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 1059                   print();
 1060                   System.out.println();
 1061               }
 1062               do {
 1063                   c = fCurrentEntity.ch[fCurrentEntity.position++];
 1064                   if (c == '\r' && isExternal) {
 1065                       newlines++;
 1066                       fCurrentEntity.lineNumber++;
 1067                       fCurrentEntity.columnNumber = 1;
 1068                       if (fCurrentEntity.position == fCurrentEntity.count) {
 1069                           invokeListeners(newlines);
 1070                           offset = 0;
 1071                           fCurrentEntity.position = newlines;
 1072                           if (load(newlines, false)) {
 1073                               break;
 1074                           }
 1075                       }
 1076                       if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 1077                           fCurrentEntity.position++;
 1078                           offset++;
 1079                       }
 1080                       /*** NEWLINE NORMALIZATION ***/
 1081                       else {
 1082                           newlines++;
 1083                       }
 1084                       /***/
 1085                   } else if (c == '\n') {
 1086                       newlines++;
 1087                       fCurrentEntity.lineNumber++;
 1088                       fCurrentEntity.columnNumber = 1;
 1089                       if (fCurrentEntity.position == fCurrentEntity.count) {
 1090                           offset = 0;
 1091                           invokeListeners(newlines);
 1092                           fCurrentEntity.position = newlines;
 1093                           if (load(newlines, false)) {
 1094                               break;
 1095                           }
 1096                       }
 1097                       /*** NEWLINE NORMALIZATION ***
 1098                        * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
 1099                        * && external) {
 1100                        * fCurrentEntity.position++;
 1101                        * offset++;
 1102                        * }
 1103                        * /***/
 1104                   } else {
 1105                       fCurrentEntity.position--;
 1106                       break;
 1107                   }
 1108               } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 1109               int i=0;
 1110               for ( i = offset; i < fCurrentEntity.position; i++) {
 1111                   fCurrentEntity.ch[i] = '\n';
 1112                   whiteSpaceLookup[whiteSpaceLen++]=i;
 1113               }
 1114   
 1115               int length = fCurrentEntity.position - offset;
 1116               if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 1117                   content.setValues(fCurrentEntity.ch, offset, length);
 1118                   if (DEBUG_BUFFER) {
 1119                       System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 1120                       print();
 1121                       System.out.println();
 1122                   }
 1123                   return -1;
 1124               }
 1125               if (DEBUG_BUFFER) {
 1126                   System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 1127                   print();
 1128                   System.out.println();
 1129               }
 1130           }
 1131   
 1132           // scan literal value
 1133           while (fCurrentEntity.position < fCurrentEntity.count) {
 1134               c = fCurrentEntity.ch[fCurrentEntity.position++];
 1135               if ((c == quote &&
 1136                    (!fCurrentEntity.literal || isExternal))
 1137                   || c == '%' || !XMLChar.isContent(c)) {
 1138                   fCurrentEntity.position--;
 1139                   break;
 1140               }
 1141               if(whiteSpaceInfoNeeded){
 1142                   if(c == 0x20 || c == 0x9){
 1143                       if(whiteSpaceLen < whiteSpaceLookup.length){
 1144                           whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position-1;
 1145                       }else{
 1146                           int [] tmp = new int[whiteSpaceLookup.length*2];
 1147                           System.arraycopy(whiteSpaceLookup,0,tmp,0,whiteSpaceLookup.length);
 1148                           whiteSpaceLookup = tmp;
 1149                           whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position - 1;
 1150                       }
 1151                   }
 1152               }
 1153           }
 1154           int length = fCurrentEntity.position - offset;
 1155           fCurrentEntity.columnNumber += length - newlines;
 1156           content.setValues(fCurrentEntity.ch, offset, length);
 1157   
 1158           // return next character
 1159           if (fCurrentEntity.position != fCurrentEntity.count) {
 1160               c = fCurrentEntity.ch[fCurrentEntity.position];
 1161               // NOTE: We don't want to accidentally signal the
 1162               //       end of the literal if we're expanding an
 1163               //       entity appearing in the literal. -Ac
 1164               if (c == quote && fCurrentEntity.literal) {
 1165                   c = -1;
 1166               }
 1167           } else {
 1168               c = -1;
 1169           }
 1170           if (DEBUG_BUFFER) {
 1171               System.out.print(")scanLiteral, '"+(char)quote+"': ");
 1172               print();
 1173               System.out.println(" -> '"+(char)c+"'");
 1174           }
 1175           return c;
 1176   
 1177       } // scanLiteral(int,XMLString):int
 1178   
 1179       //CHANGED:
 1180       /**
 1181        * Scans a range of character data up to the specified delimiter,
 1182        * setting the fields of the XMLString structure, appropriately.
 1183        * <p>
 1184        * <strong>Note:</strong> The characters are consumed.
 1185        * <p>
 1186        * <strong>Note:</strong> This assumes that the length of the delimiter
 1187        * and that the delimiter contains at least one character.
 1188        * <p>
 1189        * <strong>Note:</strong> This method does not guarantee to return
 1190        * the longest run of character data. This method may return before
 1191        * the delimiter due to reaching the end of the input buffer or any
 1192        * other reason.
 1193        * <p>
 1194        * @param delimiter The string that signifies the end of the character
 1195        *                  data to be scanned.
 1196        * @param data      The data structure to fill. Data will be appendd to the current buffer.
 1197        *
 1198        * @return Returns true if there is more data to scan, false otherwise.
 1199        *
 1200        * @throws IOException  Thrown if i/o error occurs.
 1201        * @throws EOFException Thrown on end of file.
 1202        */
 1203       public boolean scanData(String delimiter, XMLStringBuffer buffer)
 1204       throws IOException {
 1205   
 1206           boolean done = false;
 1207           int delimLen = delimiter.length();
 1208           char charAt0 = delimiter.charAt(0);
 1209           do {
 1210               if (DEBUG_BUFFER) {
 1211                   System.out.print("(scanData: ");
 1212                   print();
 1213                   System.out.println();
 1214               }
 1215   
 1216               // load more characters, if needed
 1217   
 1218               if (fCurrentEntity.position == fCurrentEntity.count) {
 1219                   load(0, true);
 1220               }
 1221   
 1222               boolean bNextEntity = false;
 1223   
 1224               while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
 1225                   && (!bNextEntity))
 1226               {
 1227                 System.arraycopy(fCurrentEntity.ch,
 1228                                  fCurrentEntity.position,
 1229                                  fCurrentEntity.ch,
 1230                                  0,
 1231                                  fCurrentEntity.count - fCurrentEntity.position);
 1232   
 1233                 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false);
 1234                 fCurrentEntity.position = 0;
 1235                 fCurrentEntity.startPosition = 0;
 1236               }
 1237   
 1238               if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
 1239                   // something must be wrong with the input:  e.g., file ends in an unterminated comment
 1240                   int length = fCurrentEntity.count - fCurrentEntity.position;
 1241                   buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
 1242                   fCurrentEntity.columnNumber += fCurrentEntity.count;
 1243                   fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 1244                   fCurrentEntity.position = fCurrentEntity.count;
 1245                   fCurrentEntity.startPosition = fCurrentEntity.count;
 1246                   load(0, true);
 1247                   return false;
 1248               }
 1249   
 1250               // normalize newlines
 1251               int offset = fCurrentEntity.position;
 1252               int c = fCurrentEntity.ch[offset];
 1253               int newlines = 0;
 1254               if (c == '\n' || (c == '\r' && isExternal)) {
 1255                   if (DEBUG_BUFFER) {
 1256                       System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 1257                       print();
 1258                       System.out.println();
 1259                   }
 1260                   do {
 1261                       c = fCurrentEntity.ch[fCurrentEntity.position++];
 1262                       if (c == '\r' && isExternal) {
 1263                           newlines++;
 1264                           fCurrentEntity.lineNumber++;
 1265                           fCurrentEntity.columnNumber = 1;
 1266                           if (fCurrentEntity.position == fCurrentEntity.count) {
 1267                               offset = 0;
 1268                               invokeListeners(newlines);
 1269                               fCurrentEntity.position = newlines;
 1270                               if (load(newlines, false)) {
 1271                                   break;
 1272                               }
 1273                           }
 1274                           if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 1275                               fCurrentEntity.position++;
 1276                               offset++;
 1277                           }
 1278                           /*** NEWLINE NORMALIZATION ***/
 1279                           else {
 1280                               newlines++;
 1281                           }
 1282                       } else if (c == '\n') {
 1283                           newlines++;
 1284                           fCurrentEntity.lineNumber++;
 1285                           fCurrentEntity.columnNumber = 1;
 1286                           if (fCurrentEntity.position == fCurrentEntity.count) {
 1287                               offset = 0;
 1288                               invokeListeners(newlines);
 1289                               fCurrentEntity.position = newlines;
 1290                               fCurrentEntity.count = newlines;
 1291                               if (load(newlines, false)) {
 1292                                   break;
 1293                               }
 1294                           }
 1295                       } else {
 1296                           fCurrentEntity.position--;
 1297                           break;
 1298                       }
 1299                   } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 1300                   for (int i = offset; i < fCurrentEntity.position; i++) {
 1301                       fCurrentEntity.ch[i] = '\n';
 1302                   }
 1303                   int length = fCurrentEntity.position - offset;
 1304                   if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 1305                       buffer.append(fCurrentEntity.ch, offset, length);
 1306                       if (DEBUG_BUFFER) {
 1307                           System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 1308                           print();
 1309                           System.out.println();
 1310                       }
 1311                       return true;
 1312                   }
 1313                   if (DEBUG_BUFFER) {
 1314                       System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 1315                       print();
 1316                       System.out.println();
 1317                   }
 1318               }
 1319   
 1320               // iterate over buffer looking for delimiter
 1321               OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
 1322                   c = fCurrentEntity.ch[fCurrentEntity.position++];
 1323                   if (c == charAt0) {
 1324                       // looks like we just hit the delimiter
 1325                       int delimOffset = fCurrentEntity.position - 1;
 1326                       for (int i = 1; i < delimLen; i++) {
 1327                           if (fCurrentEntity.position == fCurrentEntity.count) {
 1328                               fCurrentEntity.position -= i;
 1329                               break OUTER;
 1330                           }
 1331                           c = fCurrentEntity.ch[fCurrentEntity.position++];
 1332                           if (delimiter.charAt(i) != c) {
 1333                               fCurrentEntity.position -= i;
 1334                               break;
 1335                           }
 1336                       }
 1337                       if (fCurrentEntity.position == delimOffset + delimLen) {
 1338                           done = true;
 1339                           break;
 1340                       }
 1341                   } else if (c == '\n' || (isExternal && c == '\r')) {
 1342                       fCurrentEntity.position--;
 1343                       break;
 1344                   } else if (XMLChar.isInvalid(c)) {
 1345                       fCurrentEntity.position--;
 1346                       int length = fCurrentEntity.position - offset;
 1347                       fCurrentEntity.columnNumber += length - newlines;
 1348                       buffer.append(fCurrentEntity.ch, offset, length);
 1349                       return true;
 1350                   }
 1351               }
 1352               int length = fCurrentEntity.position - offset;
 1353               fCurrentEntity.columnNumber += length - newlines;
 1354               if (done) {
 1355                   length -= delimLen;
 1356               }
 1357               buffer.append(fCurrentEntity.ch, offset, length);
 1358   
 1359               // return true if string was skipped
 1360               if (DEBUG_BUFFER) {
 1361                   System.out.print(")scanData: ");
 1362                   print();
 1363                   System.out.println(" -> " + done);
 1364               }
 1365           } while (!done);
 1366           return !done;
 1367   
 1368       } // scanData(String,XMLString)
 1369   
 1370       /**
 1371        * Skips a character appearing immediately on the input.
 1372        * <p>
 1373        * <strong>Note:</strong> The character is consumed only if it matches
 1374        * the specified character.
 1375        *
 1376        * @param c The character to skip.
 1377        *
 1378        * @return Returns true if the character was skipped.
 1379        *
 1380        * @throws IOException  Thrown if i/o error occurs.
 1381        * @throws EOFException Thrown on end of file.
 1382        */
 1383       public boolean skipChar(int c) throws IOException {
 1384           if (DEBUG_BUFFER) {
 1385               System.out.print("(skipChar, '"+(char)c+"': ");
 1386               print();
 1387               System.out.println();
 1388           }
 1389   
 1390           // load more characters, if needed
 1391           if (fCurrentEntity.position == fCurrentEntity.count) {
 1392               invokeListeners(0);
 1393               load(0, true);
 1394           }
 1395   
 1396           // skip character
 1397           int cc = fCurrentEntity.ch[fCurrentEntity.position];
 1398           if (cc == c) {
 1399               fCurrentEntity.position++;
 1400               if (c == '\n') {
 1401                   fCurrentEntity.lineNumber++;
 1402                   fCurrentEntity.columnNumber = 1;
 1403               } else {
 1404                   fCurrentEntity.columnNumber++;
 1405               }
 1406               if (DEBUG_BUFFER) {
 1407                   System.out.print(")skipChar, '"+(char)c+"': ");
 1408                   print();
 1409                   System.out.println(" -> true");
 1410               }
 1411               return true;
 1412           } else if (c == '\n' && cc == '\r' && isExternal) {
 1413               // handle newlines
 1414               if (fCurrentEntity.position == fCurrentEntity.count) {
 1415                   invokeListeners(1);
 1416                   fCurrentEntity.ch[0] = (char)cc;
 1417                   load(1, false);
 1418               }
 1419               fCurrentEntity.position++;
 1420               if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 1421                   fCurrentEntity.position++;
 1422               }
 1423               fCurrentEntity.lineNumber++;
 1424               fCurrentEntity.columnNumber = 1;
 1425               if (DEBUG_BUFFER) {
 1426                   System.out.print(")skipChar, '"+(char)c+"': ");
 1427                   print();
 1428                   System.out.println(" -> true");
 1429               }
 1430               return true;
 1431           }
 1432   
 1433           // character was not skipped
 1434           if (DEBUG_BUFFER) {
 1435               System.out.print(")skipChar, '"+(char)c+"': ");
 1436               print();
 1437               System.out.println(" -> false");
 1438           }
 1439           return false;
 1440   
 1441       } // skipChar(int):boolean
 1442   
 1443       public boolean isSpace(char ch){
 1444           return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
 1445       }
 1446       /**
 1447        * Skips space characters appearing immediately on the input.
 1448        * <p>
 1449        * <strong>Note:</strong> The characters are consumed only if they are
 1450        * space characters.
 1451        *
 1452        * @return Returns true if at least one space character was skipped.
 1453        *
 1454        * @throws IOException  Thrown if i/o error occurs.
 1455        * @throws EOFException Thrown on end of file.
 1456        *
 1457        * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
 1458        */
 1459       public boolean skipSpaces() throws IOException {
 1460           if (DEBUG_BUFFER) {
 1461               System.out.print("(skipSpaces: ");
 1462               print();
 1463               System.out.println();
 1464           }
 1465           //boolean entityChanged = false;
 1466           // load more characters, if needed
 1467           if (fCurrentEntity.position == fCurrentEntity.count) {
 1468               invokeListeners(0);
 1469               load(0, true);
 1470           }
 1471   
 1472           //we are doing this check only in skipSpace() because it is called by
 1473           //fMiscDispatcher and we want the parser to exit gracefully when document
 1474           //is well-formed.
 1475           //it is possible that end of document is reached and
 1476           //fCurrentEntity becomes null
 1477           //nothing was read so entity changed  'false' should be returned.
 1478           if(fCurrentEntity == null){
 1479               return false ;
 1480           }
 1481   
 1482           // skip spaces
 1483           int c = fCurrentEntity.ch[fCurrentEntity.position];
 1484           if (XMLChar.isSpace(c)) {
 1485               do {
 1486                   boolean entityChanged = false;
 1487                   // handle newlines
 1488                   if (c == '\n' || (isExternal && c == '\r')) {
 1489                       fCurrentEntity.lineNumber++;
 1490                       fCurrentEntity.columnNumber = 1;
 1491                       if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 1492                           invokeListeners(0);
 1493                           fCurrentEntity.ch[0] = (char)c;
 1494                           entityChanged = load(1, true);
 1495                           if (!entityChanged){
 1496                               // the load change the position to be 1,
 1497                               // need to restore it when entity not changed
 1498                               fCurrentEntity.position = 0;
 1499                           }else if(fCurrentEntity == null){
 1500                               return true ;
 1501                           }
 1502                       }
 1503                       if (c == '\r' && isExternal) {
 1504                           // REVISIT: Does this need to be updated to fix the
 1505                           //          #x0D ^#x0A newline normalization problem? -Ac
 1506                           if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
 1507                               fCurrentEntity.position--;
 1508                           }
 1509                       }
 1510                   } else {
 1511                       fCurrentEntity.columnNumber++;
 1512                   }
 1513                   // load more characters, if needed
 1514                   if (!entityChanged){
 1515                       fCurrentEntity.position++;
 1516                   }
 1517   
 1518                   if (fCurrentEntity.position == fCurrentEntity.count) {
 1519                       invokeListeners(0);
 1520                       load(0, true);
 1521   
 1522                       //we are doing this check only in skipSpace() because it is called by
 1523                       //fMiscDispatcher and we want the parser to exit gracefully when document
 1524                       //is well-formed.
 1525   
 1526                       //it is possible that end of document is reached and
 1527                       //fCurrentEntity becomes null
 1528                       //nothing was read so entity changed  'false' should be returned.
 1529                       if(fCurrentEntity == null){
 1530                           return true ;
 1531                       }
 1532   
 1533                   }
 1534               } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
 1535               if (DEBUG_BUFFER) {
 1536                   System.out.print(")skipSpaces: ");
 1537                   print();
 1538                   System.out.println(" -> true");
 1539               }
 1540               return true;
 1541           }
 1542   
 1543           // no spaces were found
 1544           if (DEBUG_BUFFER) {
 1545               System.out.print(")skipSpaces: ");
 1546               print();
 1547               System.out.println(" -> false");
 1548           }
 1549           return false;
 1550   
 1551       } // skipSpaces():boolean
 1552   
 1553   
 1554       /**
 1555        * @param legnth This function checks that following number of characters are available.
 1556        * to the underlying buffer.
 1557        * @return This function returns true if capacity asked is available.
 1558        */
 1559       public boolean arrangeCapacity(int length) throws IOException{
 1560           return arrangeCapacity(length, false);
 1561       }
 1562   
 1563       /**
 1564        * @param legnth This function checks that following number of characters are available.
 1565        * to the underlying buffer.
 1566        * @param if the underlying function should change the entity
 1567        * @return This function returns true if capacity asked is available.
 1568        *
 1569        */
 1570       public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
 1571           //check if the capacity is availble in the current buffer
 1572           //count is no. of characters in the buffer   [x][m][l]
 1573           //position is '0' based
 1574           //System.out.println("fCurrent Entity " + fCurrentEntity);
 1575           if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
 1576               return true;
 1577           }
 1578           if(DEBUG_SKIP_STRING){
 1579               System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
 1580               System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
 1581               System.out.println("length = " + length);
 1582           }
 1583           boolean entityChanged = false;
 1584           //load more characters -- this function shouldn't change the entity
 1585           while((fCurrentEntity.count - fCurrentEntity.position) < length){
 1586               if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
 1587                   invokeListeners(0);
 1588                   System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
 1589                   fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
 1590                   fCurrentEntity.position = 0;
 1591               }
 1592   
 1593               if((fCurrentEntity.count - fCurrentEntity.position) < length){
 1594                   int pos = fCurrentEntity.position;
 1595                   invokeListeners(pos);
 1596                   entityChanged = load(fCurrentEntity.count, changeEntity);
 1597                   fCurrentEntity.position = pos;
 1598                   if(entityChanged)break;
 1599               }
 1600               if(DEBUG_SKIP_STRING){
 1601                   System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
 1602                   System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
 1603                   System.out.println("length = " + length);
 1604               }
 1605           }
 1606           //load changes the position.. set it back to the point where we started.
 1607   
 1608           //after loading check again.
 1609           if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
 1610               return true;
 1611           } else {
 1612               return false;
 1613           }
 1614       }
 1615   
 1616       /**
 1617        * Skips the specified string appearing immediately on the input.
 1618        * <p>
 1619        * <strong>Note:</strong> The characters are consumed only if all
 1620        * the characters are skipped.
 1621        *
 1622        * @param s The string to skip.
 1623        *
 1624        * @return Returns true if the string was skipped.
 1625        *
 1626        * @throws IOException  Thrown if i/o error occurs.
 1627        * @throws EOFException Thrown on end of file.
 1628        */
 1629       public boolean skipString(String s) throws IOException {
 1630   
 1631           final int length = s.length();
 1632   
 1633           //first make sure that required capacity is avaible
 1634           if(arrangeCapacity(length, false)){
 1635               final int beforeSkip = fCurrentEntity.position ;
 1636               int afterSkip = fCurrentEntity.position + length - 1 ;
 1637               if(DEBUG_SKIP_STRING){
 1638                   System.out.println("skipString,length = " + s + "," + length);
 1639                   System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
 1640               }
 1641   
 1642               //s.charAt() indexes are 0 to 'Length -1' based.
 1643               int i = length - 1 ;
 1644               //check from reverse
 1645               while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
 1646                   if(afterSkip-- == beforeSkip){
 1647                       fCurrentEntity.position = fCurrentEntity.position + length ;
 1648                       fCurrentEntity.columnNumber += length;
 1649                       return true;
 1650                   }
 1651               }
 1652           }
 1653   
 1654           return false;
 1655       } // skipString(String):boolean
 1656   
 1657       public boolean skipString(char [] s) throws IOException {
 1658   
 1659           final int length = s.length;
 1660           //first make sure that required capacity is avaible
 1661           if(arrangeCapacity(length, false)){
 1662               int beforeSkip = fCurrentEntity.position ;
 1663               int afterSkip = fCurrentEntity.position + length  ;
 1664   
 1665               if(DEBUG_SKIP_STRING){
 1666                   System.out.println("skipString,length = " + new String(s) + "," + length);
 1667                   System.out.println("skipString,length = " + new String(s) + "," + length);
 1668               }
 1669   
 1670               for(int i=0;i<length;i++){
 1671                   if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
 1672                      return false;
 1673                   }
 1674               }
 1675               fCurrentEntity.position = fCurrentEntity.position + length ;
 1676               fCurrentEntity.columnNumber += length;
 1677               return true;
 1678   
 1679           }
 1680   
 1681           return false;
 1682       }
 1683   
 1684       //
 1685       // Locator methods
 1686       //
 1687       //
 1688       // Private methods
 1689       //
 1690   
 1691       /**
 1692        * Loads a chunk of text.
 1693        *
 1694        * @param offset       The offset into the character buffer to
 1695        *                     read the next batch of characters.
 1696        * @param changeEntity True if the load should change entities
 1697        *                     at the end of the entity, otherwise leave
 1698        *                     the current entity in place and the entity
 1699        *                     boundary will be signaled by the return
 1700        *                     value.
 1701        *
 1702        * @returns Returns true if the entity changed as a result of this
 1703        *          load operation.
 1704        */
 1705       final boolean load(int offset, boolean changeEntity)
 1706       throws IOException {
 1707           if (DEBUG_BUFFER) {
 1708               System.out.print("(load, "+offset+": ");
 1709               print();
 1710               System.out.println();
 1711           }
 1712           //maintaing the count till last load
 1713           fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
 1714           // read characters
 1715           int length = fCurrentEntity.mayReadChunks ?
 1716               (fCurrentEntity.ch.length - offset): (fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE);
 1717   
 1718           if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
 1719           int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
 1720           if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
 1721   
 1722           // reset count and position
 1723           boolean entityChanged = false;
 1724           if (count != -1) {
 1725               if (count != 0) {
 1726                   // record the last count
 1727                   fCurrentEntity.fLastCount = count;
 1728                   fCurrentEntity.count = count + offset;
 1729                   fCurrentEntity.position = offset;
 1730               }
 1731           }
 1732           // end of this entity
 1733           else {
 1734               fCurrentEntity.count = offset;
 1735               fCurrentEntity.position = offset;
 1736               entityChanged = true;
 1737   
 1738               if (changeEntity) {
 1739                   //notify the entity manager about the end of entity
 1740                   fEntityManager.endEntity();
 1741                   //return if the current entity becomes null
 1742                   if(fCurrentEntity == null){
 1743                       return true ;
 1744                   }
 1745                   // handle the trailing edges
 1746                   if (fCurrentEntity.position == fCurrentEntity.count) {
 1747                       load(0, true);
 1748                   }
 1749               }
 1750   
 1751           }
 1752           if (DEBUG_BUFFER) {
 1753               System.out.print(")load, "+offset+": ");
 1754               print();
 1755               System.out.println();
 1756           }
 1757   
 1758           return entityChanged;
 1759   
 1760       } // load(int, boolean):boolean
 1761   
 1762       /**
 1763        * Creates a reader capable of reading the given input stream in
 1764        * the specified encoding.
 1765        *
 1766        * @param inputStream  The input stream.
 1767        * @param encoding     The encoding name that the input stream is
 1768        *                     encoded using. If the user has specified that
 1769        *                     Java encoding names are allowed, then the
 1770        *                     encoding name may be a Java encoding name;
 1771        *                     otherwise, it is an ianaEncoding name.
 1772        * @param isBigEndian   For encodings (like uCS-4), whose names cannot
 1773        *                      specify a byte order, this tells whether the order is bigEndian.  null menas
 1774        *                      unknown or not relevant.
 1775        *
 1776        * @return Returns a reader.
 1777        */
 1778       protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
 1779       throws IOException {
 1780   
 1781           // normalize encoding name
 1782           if (encoding == null) {
 1783               encoding = "UTF-8";
 1784           }
 1785   
 1786           // try to use an optimized reader
 1787           String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
 1788           if (ENCODING.equals("UTF-8")) {
 1789               if (DEBUG_ENCODINGS) {
 1790                   System.out.println("$$$ creating UTF8Reader");
 1791               }
 1792               return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
 1793           }
 1794           if (ENCODING.equals("US-ASCII")) {
 1795               if (DEBUG_ENCODINGS) {
 1796                   System.out.println("$$$ creating ASCIIReader");
 1797               }
 1798               return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
 1799           }
 1800           if(ENCODING.equals("ISO-10646-UCS-4")) {
 1801               if(isBigEndian != null) {
 1802                   boolean isBE = isBigEndian.booleanValue();
 1803                   if(isBE) {
 1804                       return new UCSReader(inputStream, UCSReader.UCS4BE);
 1805                   } else {
 1806                       return new UCSReader(inputStream, UCSReader.UCS4LE);
 1807                   }
 1808               } else {
 1809                   fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 1810                           "EncodingByteOrderUnsupported",
 1811                           new Object[] { encoding },
 1812                           XMLErrorReporter.SEVERITY_FATAL_ERROR);
 1813               }
 1814           }
 1815           if(ENCODING.equals("ISO-10646-UCS-2")) {
 1816               if(isBigEndian != null) { // sould never happen with this encoding...
 1817                   boolean isBE = isBigEndian.booleanValue();
 1818                   if(isBE) {
 1819                       return new UCSReader(inputStream, UCSReader.UCS2BE);
 1820                   } else {
 1821                       return new UCSReader(inputStream, UCSReader.UCS2LE);
 1822                   }
 1823               } else {
 1824                   fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 1825                           "EncodingByteOrderUnsupported",
 1826                           new Object[] { encoding },
 1827                           XMLErrorReporter.SEVERITY_FATAL_ERROR);
 1828               }
 1829           }
 1830   
 1831           // check for valid name
 1832           boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
 1833           boolean validJava = XMLChar.isValidJavaEncoding(encoding);
 1834           if (!validIANA || (fAllowJavaEncodings && !validJava)) {
 1835               fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 1836                       "EncodingDeclInvalid",
 1837                       new Object[] { encoding },
 1838                       XMLErrorReporter.SEVERITY_FATAL_ERROR);
 1839                       // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
 1840                       //       because every byte is a valid ISO Latin 1 character.
 1841                       //       It may not translate correctly but if we failed on
 1842                       //       the encoding anyway, then we're expecting the content
 1843                       //       of the document to be bad. This will just prevent an
 1844                       //       invalid UTF-8 sequence to be detected. This is only
 1845                       //       important when continue-after-fatal-error is turned
 1846                       //       on. -Ac
 1847                       encoding = "ISO-8859-1";
 1848           }
 1849   
 1850           // try to use a Java reader
 1851           String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
 1852           if (javaEncoding == null) {
 1853               if(fAllowJavaEncodings) {
 1854                   javaEncoding = encoding;
 1855               } else {
 1856                   fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 1857                           "EncodingDeclInvalid",
 1858                           new Object[] { encoding },
 1859                           XMLErrorReporter.SEVERITY_FATAL_ERROR);
 1860                           // see comment above.
 1861                           javaEncoding = "ISO8859_1";
 1862               }
 1863           }
 1864           else if (javaEncoding.equals("ASCII")) {
 1865               if (DEBUG_ENCODINGS) {
 1866                   System.out.println("$$$ creating ASCIIReader");
 1867               }
 1868               return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
 1869           }
 1870   
 1871           if (DEBUG_ENCODINGS) {
 1872               System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
 1873               if (javaEncoding == encoding) {
 1874                   System.out.print(" (IANA encoding)");
 1875               }
 1876               System.out.println();
 1877           }
 1878           return new InputStreamReader(inputStream, javaEncoding);
 1879   
 1880       } // createReader(InputStream,String, Boolean): Reader
 1881   
 1882       /**
 1883        * Returns the IANA encoding name that is auto-detected from
 1884        * the bytes specified, with the endian-ness of that encoding where appropriate.
 1885        *
 1886        * @param b4    The first four bytes of the input.
 1887        * @param count The number of bytes actually read.
 1888        * @return a 2-element array:  the first element, an IANA-encoding string,
 1889        *  the second element a Boolean which is true iff the document is big endian, false
 1890        *  if it's little-endian, and null if the distinction isn't relevant.
 1891        */
 1892       protected Object[] getEncodingName(byte[] b4, int count) {
 1893   
 1894           if (count < 2) {
 1895               return new Object[]{"UTF-8", null};
 1896           }
 1897   
 1898           // UTF-16, with BOM
 1899           int b0 = b4[0] & 0xFF;
 1900           int b1 = b4[1] & 0xFF;
 1901           if (b0 == 0xFE && b1 == 0xFF) {
 1902               // UTF-16, big-endian
 1903               return new Object [] {"UTF-16BE", new Boolean(true)};
 1904           }
 1905           if (b0 == 0xFF && b1 == 0xFE) {
 1906               // UTF-16, little-endian
 1907               return new Object [] {"UTF-16LE", new Boolean(false)};
 1908           }
 1909   
 1910           // default to UTF-8 if we don't have enough bytes to make a
 1911           // good determination of the encoding
 1912           if (count < 3) {
 1913               return new Object [] {"UTF-8", null};
 1914           }
 1915   
 1916           // UTF-8 with a BOM
 1917           int b2 = b4[2] & 0xFF;
 1918           if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
 1919               return new Object [] {"UTF-8", null};
 1920           }
 1921   
 1922           // default to UTF-8 if we don't have enough bytes to make a
 1923           // good determination of the encoding
 1924           if (count < 4) {
 1925               return new Object [] {"UTF-8", null};
 1926           }
 1927   
 1928           // other encodings
 1929           int b3 = b4[3] & 0xFF;
 1930           if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
 1931               // UCS-4, big endian (1234)
 1932               return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
 1933           }
 1934           if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
 1935               // UCS-4, little endian (4321)
 1936               return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
 1937           }
 1938           if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
 1939               // UCS-4, unusual octet order (2143)
 1940               // REVISIT: What should this be?
 1941               return new Object [] {"ISO-10646-UCS-4", null};
 1942           }
 1943           if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
 1944               // UCS-4, unusual octect order (3412)
 1945               // REVISIT: What should this be?
 1946               return new Object [] {"ISO-10646-UCS-4", null};
 1947           }
 1948           if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
 1949               // UTF-16, big-endian, no BOM
 1950               // (or could turn out to be UCS-2...
 1951               // REVISIT: What should this be?
 1952               return new Object [] {"UTF-16BE", new Boolean(true)};
 1953           }
 1954           if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
 1955               // UTF-16, little-endian, no BOM
 1956               // (or could turn out to be UCS-2...
 1957               return new Object [] {"UTF-16LE", new Boolean(false)};
 1958           }
 1959           if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
 1960               // EBCDIC
 1961               // a la xerces1, return CP037 instead of EBCDIC here
 1962               return new Object [] {"CP037", null};
 1963           }
 1964   
 1965           // default encoding
 1966           return new Object [] {"UTF-8", null};
 1967   
 1968       } // getEncodingName(byte[],int):Object[]
 1969   
 1970       /**
 1971        * xxx not removing endEntity() so that i remember that we need to implement it.
 1972        * Ends an entity.
 1973        *
 1974        * @throws XNIException Thrown by entity handler to signal an error.
 1975        */
 1976       //
 1977       /** Prints the contents of the buffer. */
 1978       final void print() {
 1979           if (DEBUG_BUFFER) {
 1980               if (fCurrentEntity != null) {
 1981                   System.out.print('[');
 1982                   System.out.print(fCurrentEntity.count);
 1983                   System.out.print(' ');
 1984                   System.out.print(fCurrentEntity.position);
 1985                   if (fCurrentEntity.count > 0) {
 1986                       System.out.print(" \"");
 1987                       for (int i = 0; i < fCurrentEntity.count; i++) {
 1988                           if (i == fCurrentEntity.position) {
 1989                               System.out.print('^');
 1990                           }
 1991                           char c = fCurrentEntity.ch[i];
 1992                           switch (c) {
 1993                               case '\n': {
 1994                                   System.out.print("\\n");
 1995                                   break;
 1996                               }
 1997                               case '\r': {
 1998                                   System.out.print("\\r");
 1999                                   break;
 2000                               }
 2001                               case '\t': {
 2002                                   System.out.print("\\t");
 2003                                   break;
 2004                               }
 2005                               case '\\': {
 2006                                   System.out.print("\\\\");
 2007                                   break;
 2008                               }
 2009                               default: {
 2010                                   System.out.print(c);
 2011                               }
 2012                           }
 2013                       }
 2014                       if (fCurrentEntity.position == fCurrentEntity.count) {
 2015                           System.out.print('^');
 2016                       }
 2017                       System.out.print('"');
 2018                   }
 2019                   System.out.print(']');
 2020                   System.out.print(" @ ");
 2021                   System.out.print(fCurrentEntity.lineNumber);
 2022                   System.out.print(',');
 2023                   System.out.print(fCurrentEntity.columnNumber);
 2024               } else {
 2025                   System.out.print("*NO CURRENT ENTITY*");
 2026               }
 2027           }
 2028       }
 2029   
 2030       /**
 2031        * Registers the listener object and provides callback.
 2032        * @param listener listener to which call back should be provided when scanner buffer
 2033        * is being changed.
 2034        */
 2035       public void registerListener(XMLBufferListener listener) {
 2036           if(!listeners.contains(listener))
 2037               listeners.add(listener);
 2038       }
 2039   
 2040       /**
 2041        *
 2042        * @param loadPos Starting position from which new data is being loaded into scanner buffer.
 2043        */
 2044       private void invokeListeners(int loadPos){
 2045           for(int i=0;i<listeners.size();i++){
 2046               XMLBufferListener listener =(XMLBufferListener) listeners.get(i);
 2047               listener.refresh(loadPos);
 2048           }
 2049       }
 2050   
 2051       /**
 2052        * Skips space characters appearing immediately on the input that would
 2053        * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
 2054        * normalization is performed. This is useful when scanning structures
 2055        * such as the XMLDecl and TextDecl that can only contain US-ASCII
 2056        * characters.
 2057        * <p>
 2058        * <strong>Note:</strong> The characters are consumed only if they would
 2059        * match non-terminal S before end of line normalization is performed.
 2060        *
 2061        * @return Returns true if at least one space character was skipped.
 2062        *
 2063        * @throws IOException  Thrown if i/o error occurs.
 2064        * @throws EOFException Thrown on end of file.
 2065        *
 2066        * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
 2067        */
 2068       public boolean skipDeclSpaces() throws IOException {
 2069           if (DEBUG_BUFFER) {
 2070               System.out.print("(skipDeclSpaces: ");
 2071               //XMLEntityManager.print(fCurrentEntity);
 2072               System.out.println();
 2073           }
 2074   
 2075           // load more characters, if needed
 2076           if (fCurrentEntity.position == fCurrentEntity.count) {
 2077               load(0, true);
 2078           }
 2079   
 2080           // skip spaces
 2081           int c = fCurrentEntity.ch[fCurrentEntity.position];
 2082           if (XMLChar.isSpace(c)) {
 2083               boolean external = fCurrentEntity.isExternal();
 2084               do {
 2085                   boolean entityChanged = false;
 2086                   // handle newlines
 2087                   if (c == '\n' || (external && c == '\r')) {
 2088                       fCurrentEntity.lineNumber++;
 2089                       fCurrentEntity.columnNumber = 1;
 2090                       if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 2091                           fCurrentEntity.ch[0] = (char)c;
 2092                           entityChanged = load(1, true);
 2093                           if (!entityChanged)
 2094                               // the load change the position to be 1,
 2095                               // need to restore it when entity not changed
 2096                               fCurrentEntity.position = 0;
 2097                       }
 2098                       if (c == '\r' && external) {
 2099                           // REVISIT: Does this need to be updated to fix the
 2100                           //          #x0D ^#x0A newline normalization problem? -Ac
 2101                           if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
 2102                               fCurrentEntity.position--;
 2103                           }
 2104                       }
 2105                       /*** NEWLINE NORMALIZATION ***
 2106                        * else {
 2107                        * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
 2108                        * && external) {
 2109                        * fCurrentEntity.position++;
 2110                        * }
 2111                        * }
 2112                        * /***/
 2113                   } else {
 2114                       fCurrentEntity.columnNumber++;
 2115                   }
 2116                   // load more characters, if needed
 2117                   if (!entityChanged)
 2118                       fCurrentEntity.position++;
 2119                   if (fCurrentEntity.position == fCurrentEntity.count) {
 2120                       load(0, true);
 2121                   }
 2122               } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
 2123               if (DEBUG_BUFFER) {
 2124                   System.out.print(")skipDeclSpaces: ");
 2125                   //  XMLEntityManager.print(fCurrentEntity);
 2126                   System.out.println(" -> true");
 2127               }
 2128               return true;
 2129           }
 2130   
 2131           // no spaces were found
 2132           if (DEBUG_BUFFER) {
 2133               System.out.print(")skipDeclSpaces: ");
 2134               //XMLEntityManager.print(fCurrentEntity);
 2135               System.out.println(" -> false");
 2136           }
 2137           return false;
 2138   
 2139       } // skipDeclSpaces():boolean
 2140   
 2141   
 2142   } // class XMLEntityScanner

Save This Page
Home » openjdk-7 » com.sun.org.apache.xerces.internal » impl » [javadoc | source]