Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.xerces.impl;
   19   
   20   import java.io.EOFException;
   21   import java.io.IOException;
   22   import java.util.Locale;
   23   
   24   import org.apache.xerces.impl.io.UCSReader;
   25   import org.apache.xerces.impl.msg.XMLMessageFormatter;
   26   import org.apache.xerces.util.SymbolTable;
   27   import org.apache.xerces.util.XMLChar;
   28   import org.apache.xerces.util.XMLStringBuffer;
   29   import org.apache.xerces.xni.QName;
   30   import org.apache.xerces.xni.XMLLocator;
   31   import org.apache.xerces.xni.XMLString;
   32   
   33   /**
   34    * Implements the entity scanner methods.
   35    *
   36    * @xerces.internal
   37    * 
   38    * @author Andy Clark, IBM
   39    * @author Neil Graham, IBM
   40    * @version $Id: XMLEntityScanner.java 568411 2007-08-22 04:34:13Z mrglavas $
   41    */
   42   public class XMLEntityScanner implements XMLLocator {
   43   
   44       // constants
   45       private static final boolean DEBUG_ENCODINGS = false;
   46       private static final boolean DEBUG_BUFFER = false;
   47       
   48       /**
   49        * To signal the end of the document entity, this exception will be thrown.
   50        */
   51       private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
   52           private static final long serialVersionUID = 980337771224675268L;
   53           public Throwable fillInStackTrace() {
   54               return this;
   55           }
   56       };
   57   
   58       //
   59       // Data
   60       //
   61   
   62       private XMLEntityManager fEntityManager = null;
   63       protected XMLEntityManager.ScannedEntity fCurrentEntity = null;
   64   
   65       protected SymbolTable fSymbolTable = null;
   66   
   67       protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
   68   
   69       /**
   70        * Error reporter. This property identifier is:
   71        * http://apache.org/xml/properties/internal/error-reporter
   72        */
   73       protected XMLErrorReporter fErrorReporter;
   74       //
   75       // Constructors
   76       //
   77   
   78       /** Default constructor. */
   79       public XMLEntityScanner() {
   80       } // <init>()
   81   
   82       //
   83       // XMLEntityScanner methods
   84       //
   85   
   86       /**
   87        * Returns the base system identifier of the currently scanned
   88        * entity, or null if none is available.
   89        */
   90       public final String getBaseSystemId() {
   91           return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
   92       } // getBaseSystemId():String
   93   
   94       /**
   95        * Sets the encoding of the scanner. This method is used by the
   96        * scanners if the XMLDecl or TextDecl line contains an encoding
   97        * pseudo-attribute.
   98        * <p>
   99        * <strong>Note:</strong> The underlying character reader on the
  100        * current entity will be changed to accomodate the new encoding.
  101        * However, the new encoding is ignored if the current reader was
  102        * not constructed from an input stream (e.g. an external entity
  103        * that is resolved directly to the appropriate java.io.Reader
  104        * object).
  105        *
  106        * @param encoding The IANA encoding name of the new encoding.
  107        *
  108        * @throws IOException Thrown if the new encoding is not supported.
  109        *
  110        * @see org.apache.xerces.util.EncodingMap
  111        */
  112       public final void setEncoding(String encoding) throws IOException {
  113   
  114           if (DEBUG_ENCODINGS) {
  115               System.out.println("$$$ setEncoding: "+encoding);
  116           }
  117   
  118           if (fCurrentEntity.stream != null) {
  119               // if the encoding is the same, don't change the reader and
  120               // re-use the original reader used by the OneCharReader
  121               // NOTE: Besides saving an object, this overcomes deficiencies
  122               //       in the UTF-16 reader supplied with the standard Java
  123               //       distribution (up to and including 1.3). The UTF-16
  124               //       decoder buffers 8K blocks even when only asked to read
  125               //       a single char! -Ac
  126               if (fCurrentEntity.encoding == null ||
  127                   !fCurrentEntity.encoding.equals(encoding)) {
  128                   // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
  129                   // and we know the endian-ness, we shouldn't change readers.
  130                   // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
  131                   // the endian-ness from the encoding we presently have.
  132                   if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
  133                       String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
  134                       if(ENCODING.equals("UTF-16")) return;
  135                       if(ENCODING.equals("ISO-10646-UCS-4")) {
  136                           if(fCurrentEntity.encoding.equals("UTF-16BE")) {
  137                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
  138                           } else {
  139                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
  140                           }
  141                           return;
  142                       }
  143                       if(ENCODING.equals("ISO-10646-UCS-2")) {
  144                           if(fCurrentEntity.encoding.equals("UTF-16BE")) {
  145                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
  146                           } else {
  147                               fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
  148                           }
  149                           return;
  150                       }
  151                   }
  152                   // wrap a new reader around the input stream, changing
  153                   // the encoding
  154                   if (DEBUG_ENCODINGS) {
  155                       System.out.println("$$$ creating new reader from stream: "+
  156                                       fCurrentEntity.stream);
  157                   }
  158                   //fCurrentEntity.stream.reset();
  159                   fCurrentEntity.setReader(fCurrentEntity.stream, encoding, null);
  160                   fCurrentEntity.encoding = encoding;
  161               } else {
  162                   if (DEBUG_ENCODINGS)
  163                       System.out.println("$$$ reusing old reader on stream");
  164               }
  165           }
  166   
  167       } // setEncoding(String)
  168       
  169       /**
  170        * Sets the XML version. This method is used by the
  171        * scanners to report the value of the version pseudo-attribute
  172        * in an XML or text declaration.
  173        *
  174        * @param xmlVersion the XML version of the current entity
  175        */
  176       public final void setXMLVersion(String xmlVersion) {
  177           fCurrentEntity.xmlVersion = xmlVersion;
  178       } // setXMLVersion(String)
  179   
  180       /** Returns true if the current entity being scanned is external. */
  181       public final boolean isExternal() {
  182           return fCurrentEntity.isExternal();
  183       } // isExternal():boolean
  184   
  185       /**
  186        * Returns the next character on the input.
  187        * <p>
  188        * <strong>Note:</strong> The character is <em>not</em> consumed.
  189        *
  190        * @throws IOException  Thrown if i/o error occurs.
  191        * @throws EOFException Thrown on end of file.
  192        */
  193       public int peekChar() throws IOException {
  194           if (DEBUG_BUFFER) {
  195               System.out.print("(peekChar: ");
  196               XMLEntityManager.print(fCurrentEntity);
  197               System.out.println();
  198           }
  199   
  200           // load more characters, if needed
  201           if (fCurrentEntity.position == fCurrentEntity.count) {
  202               load(0, true);
  203           }
  204   
  205           // peek at character
  206           int c = fCurrentEntity.ch[fCurrentEntity.position];
  207   
  208           // return peeked character
  209           if (DEBUG_BUFFER) {
  210               System.out.print(")peekChar: ");
  211               XMLEntityManager.print(fCurrentEntity);
  212               if (fCurrentEntity.isExternal()) {
  213                   System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
  214               }
  215               else {
  216                   System.out.println(" -> '"+(char)c+"'");
  217               }
  218           }
  219           if (fCurrentEntity.isExternal()) {
  220               return c != '\r' ? c : '\n';
  221           }
  222           else {
  223               return c;
  224           }
  225   
  226       } // peekChar():int
  227   
  228       /**
  229        * Returns the next character on the input.
  230        * <p>
  231        * <strong>Note:</strong> The character is consumed.
  232        *
  233        * @throws IOException  Thrown if i/o error occurs.
  234        * @throws EOFException Thrown on end of file.
  235        */
  236       public int scanChar() throws IOException {
  237           if (DEBUG_BUFFER) {
  238               System.out.print("(scanChar: ");
  239               XMLEntityManager.print(fCurrentEntity);
  240               System.out.println();
  241           }
  242   
  243           // load more characters, if needed
  244           if (fCurrentEntity.position == fCurrentEntity.count) {
  245               load(0, true);
  246           }
  247   
  248           // scan character
  249           int c = fCurrentEntity.ch[fCurrentEntity.position++];
  250           boolean external = false;
  251           if (c == '\n' ||
  252               (c == '\r' && (external = fCurrentEntity.isExternal()))) {
  253               fCurrentEntity.lineNumber++;
  254               fCurrentEntity.columnNumber = 1;
  255               if (fCurrentEntity.position == fCurrentEntity.count) {
  256                   fCurrentEntity.ch[0] = (char)c;
  257                   load(1, false);
  258               }
  259               if (c == '\r' && external) {
  260                   if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
  261                       fCurrentEntity.position--;
  262                   }
  263                   c = '\n';
  264               }
  265           }
  266   
  267           // return character that was scanned
  268           if (DEBUG_BUFFER) {
  269               System.out.print(")scanChar: ");
  270               XMLEntityManager.print(fCurrentEntity);
  271               System.out.println(" -> '"+(char)c+"'");
  272           }
  273           fCurrentEntity.columnNumber++;
  274           return c;
  275   
  276       } // scanChar():int
  277   
  278       /**
  279        * Returns a string matching the NMTOKEN production appearing immediately
  280        * on the input as a symbol, or null if NMTOKEN Name string is present.
  281        * <p>
  282        * <strong>Note:</strong> The NMTOKEN characters are consumed.
  283        * <p>
  284        * <strong>Note:</strong> The string returned must be a symbol. The
  285        * SymbolTable can be used for this purpose.
  286        *
  287        * @throws IOException  Thrown if i/o error occurs.
  288        * @throws EOFException Thrown on end of file.
  289        *
  290        * @see org.apache.xerces.util.SymbolTable
  291        * @see org.apache.xerces.util.XMLChar#isName
  292        */
  293       public String scanNmtoken() throws IOException {
  294           if (DEBUG_BUFFER) {
  295               System.out.print("(scanNmtoken: ");
  296               XMLEntityManager.print(fCurrentEntity);
  297               System.out.println();
  298           }
  299   
  300           // load more characters, if needed
  301           if (fCurrentEntity.position == fCurrentEntity.count) {
  302               load(0, true);
  303           }
  304   
  305           // scan nmtoken
  306           int offset = fCurrentEntity.position;
  307           while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  308               if (++fCurrentEntity.position == fCurrentEntity.count) {
  309                   int length = fCurrentEntity.position - offset;
  310                   if (length == fCurrentEntity.ch.length) {
  311                       // bad luck we have to resize our buffer
  312                       char[] tmp = new char[fCurrentEntity.ch.length << 1];
  313                       System.arraycopy(fCurrentEntity.ch, offset,
  314                                        tmp, 0, length);
  315                       fCurrentEntity.ch = tmp;
  316                   }
  317                   else {
  318                       System.arraycopy(fCurrentEntity.ch, offset,
  319                                        fCurrentEntity.ch, 0, length);
  320                   }
  321                   offset = 0;
  322                   if (load(length, false)) {
  323                       break;
  324                   }
  325               }
  326           }
  327           int length = fCurrentEntity.position - offset;
  328           fCurrentEntity.columnNumber += length;
  329   
  330           // return nmtoken
  331           String symbol = null;
  332           if (length > 0) {
  333               symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  334           }
  335           if (DEBUG_BUFFER) {
  336               System.out.print(")scanNmtoken: ");
  337               XMLEntityManager.print(fCurrentEntity);
  338               System.out.println(" -> "+String.valueOf(symbol));
  339           }
  340           return symbol;
  341   
  342       } // scanNmtoken():String
  343   
  344       /**
  345        * Returns a string matching the Name production appearing immediately
  346        * on the input as a symbol, or null if no Name string is present.
  347        * <p>
  348        * <strong>Note:</strong> The Name characters are consumed.
  349        * <p>
  350        * <strong>Note:</strong> The string returned must be a symbol. The
  351        * SymbolTable can be used for this purpose.
  352        *
  353        * @throws IOException  Thrown if i/o error occurs.
  354        * @throws EOFException Thrown on end of file.
  355        *
  356        * @see org.apache.xerces.util.SymbolTable
  357        * @see org.apache.xerces.util.XMLChar#isName
  358        * @see org.apache.xerces.util.XMLChar#isNameStart
  359        */
  360       public String scanName() throws IOException {
  361           if (DEBUG_BUFFER) {
  362               System.out.print("(scanName: ");
  363               XMLEntityManager.print(fCurrentEntity);
  364               System.out.println();
  365           }
  366   
  367           // load more characters, if needed
  368           if (fCurrentEntity.position == fCurrentEntity.count) {
  369               load(0, true);
  370           }
  371   
  372           // scan name
  373           int offset = fCurrentEntity.position;
  374           if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
  375               if (++fCurrentEntity.position == fCurrentEntity.count) {
  376                   fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  377                   offset = 0;
  378                   if (load(1, false)) {
  379                       fCurrentEntity.columnNumber++;
  380                       String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  381                       if (DEBUG_BUFFER) {
  382                           System.out.print(")scanName: ");
  383                           XMLEntityManager.print(fCurrentEntity);
  384                           System.out.println(" -> "+String.valueOf(symbol));
  385                       }
  386                       return symbol;
  387                   }
  388               }
  389               while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  390                   if (++fCurrentEntity.position == fCurrentEntity.count) {
  391                       int length = fCurrentEntity.position - offset;
  392                       if (length == fCurrentEntity.ch.length) {
  393                           // bad luck we have to resize our buffer
  394                           char[] tmp = new char[fCurrentEntity.ch.length << 1];
  395                           System.arraycopy(fCurrentEntity.ch, offset,
  396                                            tmp, 0, length);
  397                           fCurrentEntity.ch = tmp;
  398                       }
  399                       else {
  400                           System.arraycopy(fCurrentEntity.ch, offset,
  401                                            fCurrentEntity.ch, 0, length);
  402                       }
  403                       offset = 0;
  404                       if (load(length, false)) {
  405                           break;
  406                       }
  407                   }
  408               }
  409           }
  410           int length = fCurrentEntity.position - offset;
  411           fCurrentEntity.columnNumber += length;
  412   
  413           // return name
  414           String symbol = null;
  415           if (length > 0) {
  416               symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  417           }
  418           if (DEBUG_BUFFER) {
  419               System.out.print(")scanName: ");
  420               XMLEntityManager.print(fCurrentEntity);
  421               System.out.println(" -> "+String.valueOf(symbol));
  422           }
  423           return symbol;
  424   
  425       } // scanName():String
  426   
  427       /**
  428        * Returns a string matching the NCName production appearing immediately
  429        * on the input as a symbol, or null if no NCName string is present.
  430        * <p>
  431        * <strong>Note:</strong> The NCName characters are consumed.
  432        * <p>
  433        * <strong>Note:</strong> The string returned must be a symbol. The
  434        * SymbolTable can be used for this purpose.
  435        *
  436        * @throws IOException  Thrown if i/o error occurs.
  437        * @throws EOFException Thrown on end of file.
  438        *
  439        * @see org.apache.xerces.util.SymbolTable
  440        * @see org.apache.xerces.util.XMLChar#isNCName
  441        * @see org.apache.xerces.util.XMLChar#isNCNameStart
  442        */
  443       public String scanNCName() throws IOException {
  444           if (DEBUG_BUFFER) {
  445               System.out.print("(scanNCName: ");
  446               XMLEntityManager.print(fCurrentEntity);
  447               System.out.println();
  448           }
  449   
  450           // load more characters, if needed
  451           if (fCurrentEntity.position == fCurrentEntity.count) {
  452               load(0, true);
  453           }
  454   
  455           // scan name
  456           int offset = fCurrentEntity.position;
  457           if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
  458               if (++fCurrentEntity.position == fCurrentEntity.count) {
  459                   fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  460                   offset = 0;
  461                   if (load(1, false)) {
  462                       fCurrentEntity.columnNumber++;
  463                       String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  464                       if (DEBUG_BUFFER) {
  465                           System.out.print(")scanNCName: ");
  466                           XMLEntityManager.print(fCurrentEntity);
  467                           System.out.println(" -> "+String.valueOf(symbol));
  468                       }
  469                       return symbol;
  470                   }
  471               }
  472               while (XMLChar.isNCName(fCurrentEntity.ch[fCurrentEntity.position])) {
  473                   if (++fCurrentEntity.position == fCurrentEntity.count) {
  474                       int length = fCurrentEntity.position - offset;
  475                       if (length == fCurrentEntity.ch.length) {
  476                           // bad luck we have to resize our buffer
  477                           char[] tmp = new char[fCurrentEntity.ch.length << 1];
  478                           System.arraycopy(fCurrentEntity.ch, offset,
  479                                            tmp, 0, length);
  480                           fCurrentEntity.ch = tmp;
  481                       }
  482                       else {
  483                           System.arraycopy(fCurrentEntity.ch, offset,
  484                                            fCurrentEntity.ch, 0, length);
  485                       }
  486                       offset = 0;
  487                       if (load(length, false)) {
  488                           break;
  489                       }
  490                   }
  491               }
  492           }
  493           int length = fCurrentEntity.position - offset;
  494           fCurrentEntity.columnNumber += length;
  495   
  496           // return name
  497           String symbol = null;
  498           if (length > 0) {
  499               symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
  500           }
  501           if (DEBUG_BUFFER) {
  502               System.out.print(")scanNCName: ");
  503               XMLEntityManager.print(fCurrentEntity);
  504               System.out.println(" -> "+String.valueOf(symbol));
  505           }
  506           return symbol;
  507   
  508       } // scanNCName():String
  509   
  510       /**
  511        * Scans a qualified name from the input, setting the fields of the
  512        * QName structure appropriately.
  513        * <p>
  514        * <strong>Note:</strong> The qualified name characters are consumed.
  515        * <p>
  516        * <strong>Note:</strong> The strings used to set the values of the
  517        * QName structure must be symbols. The SymbolTable can be used for
  518        * this purpose.
  519        *
  520        * @param qname The qualified name structure to fill.
  521        *
  522        * @return Returns true if a qualified name appeared immediately on
  523        *         the input and was scanned, false otherwise.
  524        *
  525        * @throws IOException  Thrown if i/o error occurs.
  526        * @throws EOFException Thrown on end of file.
  527        *
  528        * @see org.apache.xerces.util.SymbolTable
  529        * @see org.apache.xerces.util.XMLChar#isName
  530        * @see org.apache.xerces.util.XMLChar#isNameStart
  531        */
  532       public boolean scanQName(QName qname) throws IOException {
  533           if (DEBUG_BUFFER) {
  534               System.out.print("(scanQName, "+qname+": ");
  535               XMLEntityManager.print(fCurrentEntity);
  536               System.out.println();
  537           }
  538   
  539           // load more characters, if needed
  540           if (fCurrentEntity.position == fCurrentEntity.count) {
  541               load(0, true);
  542           }
  543   
  544           // scan qualified name
  545           int offset = fCurrentEntity.position;
  546           if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
  547               if (++fCurrentEntity.position == fCurrentEntity.count) {
  548                   fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  549                   offset = 0;
  550                   if (load(1, false)) {
  551                       fCurrentEntity.columnNumber++;
  552                       String name =
  553                           fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  554                       qname.setValues(null, name, name, null);
  555                       if (DEBUG_BUFFER) {
  556                           System.out.print(")scanQName, "+qname+": ");
  557                           XMLEntityManager.print(fCurrentEntity);
  558                           System.out.println(" -> true");
  559                       }
  560                       return true;
  561                   }
  562               }
  563               int index = -1;
  564               while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
  565                   char c = fCurrentEntity.ch[fCurrentEntity.position];
  566   
  567                   if (c == ':') {
  568                       if (index != -1) {
  569                           break;
  570                       }
  571                       index = fCurrentEntity.position;
  572                   }
  573                   if (++fCurrentEntity.position == fCurrentEntity.count) {
  574                       int length = fCurrentEntity.position - offset;
  575                       if (length == fCurrentEntity.ch.length) {
  576                           // bad luck we have to resize our buffer
  577                           char[] tmp = new char[fCurrentEntity.ch.length << 1];
  578                           System.arraycopy(fCurrentEntity.ch, offset,
  579                                            tmp, 0, length);
  580                           fCurrentEntity.ch = tmp;
  581                       }
  582                       else {
  583                           System.arraycopy(fCurrentEntity.ch, offset,
  584                                            fCurrentEntity.ch, 0, length);
  585                       }
  586                       if (index != -1) {
  587                           index = index - offset;
  588                       }
  589                       offset = 0;
  590                       if (load(length, false)) {
  591                           break;
  592                       }
  593                   }
  594               }
  595               int length = fCurrentEntity.position - offset;
  596               fCurrentEntity.columnNumber += length;
  597               if (length > 0) {
  598                   String prefix = null;
  599                   String localpart = null;
  600                   String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
  601                                                           offset, length);
  602                   if (index != -1) {
  603                       int prefixLength = index - offset;
  604                       prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
  605                                                       offset, prefixLength);
  606                       int len = length - prefixLength - 1;
  607                       int startLocal = index +1;
  608                       if (!XMLChar.isNCNameStart(fCurrentEntity.ch[startLocal])){
  609                           fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  610                                                    "IllegalQName",
  611                                                     null,
  612                                                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
  613                       }
  614                       localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
  615                                                          startLocal, len);
  616   
  617                   }
  618                   else {
  619                       localpart = rawname;
  620                   }
  621                   qname.setValues(prefix, localpart, rawname, null);
  622                   if (DEBUG_BUFFER) {
  623                       System.out.print(")scanQName, "+qname+": ");
  624                       XMLEntityManager.print(fCurrentEntity);
  625                       System.out.println(" -> true");
  626                   }
  627                   return true;
  628               }
  629           }
  630   
  631           // no qualified name found
  632           if (DEBUG_BUFFER) {
  633               System.out.print(")scanQName, "+qname+": ");
  634               XMLEntityManager.print(fCurrentEntity);
  635               System.out.println(" -> false");
  636           }
  637           return false;
  638   
  639       } // scanQName(QName):boolean
  640   
  641       /**
  642        * Scans a range of parsed character data, setting the fields of the
  643        * XMLString structure, appropriately.
  644        * <p>
  645        * <strong>Note:</strong> The characters are consumed.
  646        * <p>
  647        * <strong>Note:</strong> This method does not guarantee to return
  648        * the longest run of parsed character data. This method may return
  649        * before markup due to reaching the end of the input buffer or any
  650        * other reason.
  651        * <p>
  652        * <strong>Note:</strong> The fields contained in the XMLString
  653        * structure are not guaranteed to remain valid upon subsequent calls
  654        * to the entity scanner. Therefore, the caller is responsible for
  655        * immediately using the returned character data or making a copy of
  656        * the character data.
  657        *
  658        * @param content The content structure to fill.
  659        *
  660        * @return Returns the next character on the input, if known. This
  661        *         value may be -1 but this does <em>note</em> designate
  662        *         end of file.
  663        *
  664        * @throws IOException  Thrown if i/o error occurs.
  665        * @throws EOFException Thrown on end of file.
  666        */
  667       public int scanContent(XMLString content) throws IOException {
  668           if (DEBUG_BUFFER) {
  669               System.out.print("(scanContent: ");
  670               XMLEntityManager.print(fCurrentEntity);
  671               System.out.println();
  672           }
  673   
  674           // load more characters, if needed
  675           if (fCurrentEntity.position == fCurrentEntity.count) {
  676               load(0, true);
  677           }
  678           else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  679               fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  680               load(1, false);
  681               fCurrentEntity.position = 0;
  682               fCurrentEntity.startPosition = 0;
  683           }
  684   
  685           // normalize newlines
  686           int offset = fCurrentEntity.position;
  687           int c = fCurrentEntity.ch[offset];
  688           int newlines = 0;
  689           boolean external = fCurrentEntity.isExternal();
  690           if (c == '\n' || (c == '\r' && external)) {
  691               if (DEBUG_BUFFER) {
  692                   System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
  693                   XMLEntityManager.print(fCurrentEntity);
  694                   System.out.println();
  695               }
  696               do {
  697                   c = fCurrentEntity.ch[fCurrentEntity.position++];
  698                   if (c == '\r' && external) {
  699                       newlines++;
  700                       fCurrentEntity.lineNumber++;
  701                       fCurrentEntity.columnNumber = 1;
  702                       if (fCurrentEntity.position == fCurrentEntity.count) {
  703                           offset = 0;
  704                           fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
  705                           fCurrentEntity.position = newlines;
  706                           fCurrentEntity.startPosition = newlines;
  707                           if (load(newlines, false)) {
  708                               break;
  709                           }
  710                       }
  711                       if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  712                           fCurrentEntity.position++;
  713                           offset++;
  714                       }
  715                       /*** NEWLINE NORMALIZATION ***/
  716                       else {
  717                           newlines++;
  718                       }
  719                   }
  720                   else if (c == '\n') {
  721                       newlines++;
  722                       fCurrentEntity.lineNumber++;
  723                       fCurrentEntity.columnNumber = 1;
  724                       if (fCurrentEntity.position == fCurrentEntity.count) {
  725                           offset = 0;
  726                           fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
  727                           fCurrentEntity.position = newlines;
  728                           fCurrentEntity.startPosition = newlines;
  729                           if (load(newlines, false)) {
  730                               break;
  731                           }
  732                       }
  733                   }
  734                   else {
  735                       fCurrentEntity.position--;
  736                       break;
  737                   }
  738               } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  739               for (int i = offset; i < fCurrentEntity.position; i++) {
  740                   fCurrentEntity.ch[i] = '\n';
  741               }
  742               int length = fCurrentEntity.position - offset;
  743               if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  744                   content.setValues(fCurrentEntity.ch, offset, length);
  745                   if (DEBUG_BUFFER) {
  746                       System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  747                       XMLEntityManager.print(fCurrentEntity);
  748                       System.out.println();
  749                   }
  750                   return -1;
  751               }
  752               if (DEBUG_BUFFER) {
  753                   System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  754                   XMLEntityManager.print(fCurrentEntity);
  755                   System.out.println();
  756               }
  757           }
  758   
  759           // inner loop, scanning for content
  760           while (fCurrentEntity.position < fCurrentEntity.count) {
  761               c = fCurrentEntity.ch[fCurrentEntity.position++];
  762               if (!XMLChar.isContent(c)) {
  763                   fCurrentEntity.position--;
  764                   break;
  765               }
  766           }
  767           int length = fCurrentEntity.position - offset;
  768           fCurrentEntity.columnNumber += length - newlines;
  769           content.setValues(fCurrentEntity.ch, offset, length);
  770   
  771           // return next character
  772           if (fCurrentEntity.position != fCurrentEntity.count) {
  773               c = fCurrentEntity.ch[fCurrentEntity.position];
  774               // REVISIT: Does this need to be updated to fix the
  775               //          #x0D ^#x0A newline normalization problem? -Ac
  776               if (c == '\r' && external) {
  777                   c = '\n';
  778               }
  779           }
  780           else {
  781               c = -1;
  782           }
  783           if (DEBUG_BUFFER) {
  784               System.out.print(")scanContent: ");
  785               XMLEntityManager.print(fCurrentEntity);
  786               System.out.println(" -> '"+(char)c+"'");
  787           }
  788           return c;
  789   
  790       } // scanContent(XMLString):int
  791   
  792       /**
  793        * Scans a range of attribute value data, setting the fields of the
  794        * XMLString structure, appropriately.
  795        * <p>
  796        * <strong>Note:</strong> The characters are consumed.
  797        * <p>
  798        * <strong>Note:</strong> This method does not guarantee to return
  799        * the longest run of attribute value data. This method may return
  800        * before the quote character due to reaching the end of the input
  801        * buffer or any other reason.
  802        * <p>
  803        * <strong>Note:</strong> The fields contained in the XMLString
  804        * structure are not guaranteed to remain valid upon subsequent calls
  805        * to the entity scanner. Therefore, the caller is responsible for
  806        * immediately using the returned character data or making a copy of
  807        * the character data.
  808        *
  809        * @param quote   The quote character that signifies the end of the
  810        *                attribute value data.
  811        * @param content The content structure to fill.
  812        *
  813        * @return Returns the next character on the input, if known. This
  814        *         value may be -1 but this does <em>note</em> designate
  815        *         end of file.
  816        *
  817        * @throws IOException  Thrown if i/o error occurs.
  818        * @throws EOFException Thrown on end of file.
  819        */
  820       public int scanLiteral(int quote, XMLString content)
  821           throws IOException {
  822           if (DEBUG_BUFFER) {
  823               System.out.print("(scanLiteral, '"+(char)quote+"': ");
  824               XMLEntityManager.print(fCurrentEntity);
  825               System.out.println();
  826           }
  827   
  828           // load more characters, if needed
  829           if (fCurrentEntity.position == fCurrentEntity.count) {
  830               load(0, true);
  831           }
  832           else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  833               fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
  834               load(1, false);
  835               fCurrentEntity.position = 0;
  836               fCurrentEntity.startPosition = 0;
  837           }
  838   
  839           // normalize newlines
  840           int offset = fCurrentEntity.position;
  841           int c = fCurrentEntity.ch[offset];
  842           int newlines = 0;
  843           boolean external = fCurrentEntity.isExternal();
  844           if (c == '\n' || (c == '\r' && external)) {
  845               if (DEBUG_BUFFER) {
  846                   System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
  847                   XMLEntityManager.print(fCurrentEntity);
  848                   System.out.println();
  849               }
  850               do {
  851                   c = fCurrentEntity.ch[fCurrentEntity.position++];
  852                   if (c == '\r' && external) {
  853                       newlines++;
  854                       fCurrentEntity.lineNumber++;
  855                       fCurrentEntity.columnNumber = 1;
  856                       if (fCurrentEntity.position == fCurrentEntity.count) {
  857                           offset = 0;
  858                           fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
  859                           fCurrentEntity.position = newlines;
  860                           fCurrentEntity.startPosition = newlines;
  861                           if (load(newlines, false)) {
  862                               break;
  863                           }
  864                       }
  865                       if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  866                           fCurrentEntity.position++;
  867                           offset++;
  868                       }
  869                       /*** NEWLINE NORMALIZATION ***/
  870                       else {
  871                           newlines++;
  872                       }
  873                       /***/
  874                   }
  875                   else if (c == '\n') {
  876                       newlines++;
  877                       fCurrentEntity.lineNumber++;
  878                       fCurrentEntity.columnNumber = 1;
  879                       if (fCurrentEntity.position == fCurrentEntity.count) {
  880                           offset = 0;
  881                           fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
  882                           fCurrentEntity.position = newlines;
  883                           fCurrentEntity.startPosition = newlines;
  884                           if (load(newlines, false)) {
  885                               break;
  886                           }
  887                       }
  888                   }
  889                   else {
  890                       fCurrentEntity.position--;
  891                       break;
  892                   }
  893               } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  894               for (int i = offset; i < fCurrentEntity.position; i++) {
  895                   fCurrentEntity.ch[i] = '\n';
  896               }
  897               int length = fCurrentEntity.position - offset;
  898               if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  899                   content.setValues(fCurrentEntity.ch, offset, length);
  900                   if (DEBUG_BUFFER) {
  901                       System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  902                       XMLEntityManager.print(fCurrentEntity);
  903                       System.out.println();
  904                   }
  905                   return -1;
  906               }
  907               if (DEBUG_BUFFER) {
  908                   System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
  909                   XMLEntityManager.print(fCurrentEntity);
  910                   System.out.println();
  911               }
  912           }
  913   
  914           // scan literal value
  915           while (fCurrentEntity.position < fCurrentEntity.count) {
  916               c = fCurrentEntity.ch[fCurrentEntity.position++];
  917               if ((c == quote &&
  918                    (!fCurrentEntity.literal || external))
  919                   || c == '%' || !XMLChar.isContent(c)) {
  920                   fCurrentEntity.position--;
  921                   break;
  922               }
  923           }
  924           int length = fCurrentEntity.position - offset;
  925           fCurrentEntity.columnNumber += length - newlines;
  926           content.setValues(fCurrentEntity.ch, offset, length);
  927   
  928           // return next character
  929           if (fCurrentEntity.position != fCurrentEntity.count) {
  930               c = fCurrentEntity.ch[fCurrentEntity.position];
  931               // NOTE: We don't want to accidentally signal the
  932               //       end of the literal if we're expanding an
  933               //       entity appearing in the literal. -Ac
  934               if (c == quote && fCurrentEntity.literal) {
  935                   c = -1;
  936               }
  937           }
  938           else {
  939               c = -1;
  940           }
  941           if (DEBUG_BUFFER) {
  942               System.out.print(")scanLiteral, '"+(char)quote+"': ");
  943               XMLEntityManager.print(fCurrentEntity);
  944               System.out.println(" -> '"+(char)c+"'");
  945           }
  946           return c;
  947   
  948       } // scanLiteral(int,XMLString):int
  949   
  950       /**
  951        * Scans a range of character data up to the specified delimiter,
  952        * setting the fields of the XMLString structure, appropriately.
  953        * <p>
  954        * <strong>Note:</strong> The characters are consumed.
  955        * <p>
  956        * <strong>Note:</strong> This assumes that the internal buffer is
  957        * at least the same size, or bigger, than the length of the delimiter
  958        * and that the delimiter contains at least one character.
  959        * <p>
  960        * <strong>Note:</strong> This method does not guarantee to return
  961        * the longest run of character data. This method may return before
  962        * the delimiter due to reaching the end of the input buffer or any
  963        * other reason.
  964        * <p>
  965        * <strong>Note:</strong> The fields contained in the XMLString
  966        * structure are not guaranteed to remain valid upon subsequent calls
  967        * to the entity scanner. Therefore, the caller is responsible for
  968        * immediately using the returned character data or making a copy of
  969        * the character data.
  970        *
  971        * @param delimiter The string that signifies the end of the character
  972        *                  data to be scanned.
  973        * @param buffer    The XMLStringBuffer to fill.
  974        *
  975        * @return Returns true if there is more data to scan, false otherwise.
  976        *
  977        * @throws IOException  Thrown if i/o error occurs.
  978        * @throws EOFException Thrown on end of file.
  979        */
  980       public boolean scanData(String delimiter, XMLStringBuffer buffer)
  981           throws IOException {
  982   
  983           // REVISIT: This method does not need to use a string buffer.
  984           //          The change would avoid the array copies and increase
  985           //          performance. -Ac
  986           //
  987           //          Currently, this method is called for scanning CDATA
  988           //          sections, comments,  and processing instruction data. 
  989           //          So if this code is updated to NOT buffer, the scanning
  990           //          code for comments and processing instructions will 
  991           //          need to be updated to do its own buffering. The code 
  992           //          for CDATA sections is safe as-is. -Ac
  993   
  994           boolean found = false;
  995           int delimLen = delimiter.length();
  996           char charAt0 = delimiter.charAt(0);
  997           boolean external = fCurrentEntity.isExternal();
  998           if (DEBUG_BUFFER) {
  999               System.out.print("(scanData: ");
 1000               XMLEntityManager.print(fCurrentEntity);
 1001               System.out.println();
 1002           }
 1003   
 1004           // load more characters, if needed
 1005   
 1006           if (fCurrentEntity.position == fCurrentEntity.count) {
 1007               load(0, true);
 1008           }
 1009   
 1010           boolean bNextEntity = false;
 1011   
 1012           while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
 1013               && (!bNextEntity))
 1014           {
 1015             System.arraycopy(fCurrentEntity.ch,
 1016                              fCurrentEntity.position,
 1017                              fCurrentEntity.ch,
 1018                              0,
 1019                              fCurrentEntity.count - fCurrentEntity.position);
 1020   
 1021             bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false);
 1022             fCurrentEntity.position = 0;
 1023             fCurrentEntity.startPosition = 0;
 1024           }
 1025   
 1026           if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
 1027               // something must be wrong with the input:  e.g., file ends in an unterminated comment
 1028               int length = fCurrentEntity.count - fCurrentEntity.position;
 1029               buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 
 1030               fCurrentEntity.columnNumber += fCurrentEntity.count;
 1031               fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 1032               fCurrentEntity.position = fCurrentEntity.count;
 1033               fCurrentEntity.startPosition = fCurrentEntity.count;
 1034               load(0,true);
 1035               return false;
 1036           }
 1037   
 1038           // normalize newlines
 1039           int offset = fCurrentEntity.position;
 1040           int c = fCurrentEntity.ch[offset];
 1041           int newlines = 0;
 1042           if (c == '\n' || (c == '\r' && external)) {
 1043               if (DEBUG_BUFFER) {
 1044                   System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
 1045                   XMLEntityManager.print(fCurrentEntity);
 1046                   System.out.println();
 1047               }
 1048               do {
 1049                   c = fCurrentEntity.ch[fCurrentEntity.position++];
 1050                   if (c == '\r' && external) {
 1051                       newlines++;
 1052                       fCurrentEntity.lineNumber++;
 1053                       fCurrentEntity.columnNumber = 1;
 1054                       if (fCurrentEntity.position == fCurrentEntity.count) {
 1055                           offset = 0;
 1056                           fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 1057                           fCurrentEntity.position = newlines;
 1058                           fCurrentEntity.startPosition = newlines;
 1059                           if (load(newlines, false)) {
 1060                               break;
 1061                           }
 1062                       }
 1063                       if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 1064                           fCurrentEntity.position++;
 1065                           offset++;
 1066                       }
 1067                       /*** NEWLINE NORMALIZATION ***/
 1068                       else {
 1069                           newlines++;
 1070                       }
 1071                   }
 1072                   else if (c == '\n') {
 1073                       newlines++;
 1074                       fCurrentEntity.lineNumber++;
 1075                       fCurrentEntity.columnNumber = 1;
 1076                       if (fCurrentEntity.position == fCurrentEntity.count) {
 1077                           offset = 0;
 1078                           fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 1079                           fCurrentEntity.position = newlines;
 1080                           fCurrentEntity.startPosition = newlines;
 1081                           fCurrentEntity.count = newlines;
 1082                           if (load(newlines, false)) {
 1083                               break;
 1084                           }
 1085                       }
 1086                   }
 1087                   else {
 1088                       fCurrentEntity.position--;
 1089                       break;
 1090                   }
 1091               } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 1092               for (int i = offset; i < fCurrentEntity.position; i++) {
 1093                   fCurrentEntity.ch[i] = '\n';
 1094               }
 1095               int length = fCurrentEntity.position - offset;
 1096               if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 1097                   buffer.append(fCurrentEntity.ch, offset, length);
 1098                   if (DEBUG_BUFFER) {
 1099                       System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 1100                       XMLEntityManager.print(fCurrentEntity);
 1101                       System.out.println();
 1102                   }
 1103                   return true;
 1104               }
 1105               if (DEBUG_BUFFER) {
 1106                   System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
 1107                   XMLEntityManager.print(fCurrentEntity);
 1108                   System.out.println();
 1109               }
 1110           }
 1111   
 1112           // iterate over buffer looking for delimiter
 1113           OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
 1114               c = fCurrentEntity.ch[fCurrentEntity.position++];
 1115               if (c == charAt0) {
 1116                   // looks like we just hit the delimiter
 1117                   int delimOffset = fCurrentEntity.position - 1;
 1118                   for (int i = 1; i < delimLen; i++) {
 1119                       if (fCurrentEntity.position == fCurrentEntity.count) {
 1120                           fCurrentEntity.position -= i;
 1121                           break OUTER;
 1122                       }
 1123                       c = fCurrentEntity.ch[fCurrentEntity.position++];
 1124                       if (delimiter.charAt(i) != c) {
 1125                           fCurrentEntity.position--;
 1126                           break;
 1127                       }
 1128                   }
 1129                   if (fCurrentEntity.position == delimOffset + delimLen) {
 1130                       found = true;
 1131                       break;
 1132                   }
 1133               }
 1134               else if (c == '\n' || (external && c == '\r')) {
 1135                   fCurrentEntity.position--;
 1136                   break;
 1137               }
 1138               else if (XMLChar.isInvalid(c)) {
 1139                   fCurrentEntity.position--;
 1140                   int length = fCurrentEntity.position - offset;
 1141                   fCurrentEntity.columnNumber += length - newlines;
 1142                   buffer.append(fCurrentEntity.ch, offset, length); 
 1143                   return true;
 1144               }
 1145           }
 1146           int length = fCurrentEntity.position - offset;
 1147           fCurrentEntity.columnNumber += length - newlines;
 1148           if (found) {
 1149               length -= delimLen;
 1150           }
 1151           buffer.append (fCurrentEntity.ch, offset, length);
 1152   
 1153           // return true if string was skipped
 1154           if (DEBUG_BUFFER) {
 1155               System.out.print(")scanData: ");
 1156               XMLEntityManager.print(fCurrentEntity);
 1157               System.out.println(" -> " + !found);
 1158           }
 1159           return !found;
 1160   
 1161       } // scanData(String,XMLString):boolean
 1162   
 1163       /**
 1164        * Skips a character appearing immediately on the input.
 1165        * <p>
 1166        * <strong>Note:</strong> The character is consumed only if it matches
 1167        * the specified character.
 1168        *
 1169        * @param c The character to skip.
 1170        *
 1171        * @return Returns true if the character was skipped.
 1172        *
 1173        * @throws IOException  Thrown if i/o error occurs.
 1174        * @throws EOFException Thrown on end of file.
 1175        */
 1176       public boolean skipChar(int c) throws IOException {
 1177           if (DEBUG_BUFFER) {
 1178               System.out.print("(skipChar, '"+(char)c+"': ");
 1179               XMLEntityManager.print(fCurrentEntity);
 1180               System.out.println();
 1181           }
 1182   
 1183           // load more characters, if needed
 1184           if (fCurrentEntity.position == fCurrentEntity.count) {
 1185               load(0, true);
 1186           }
 1187   
 1188           // skip character
 1189           int cc = fCurrentEntity.ch[fCurrentEntity.position];
 1190           if (cc == c) {
 1191               fCurrentEntity.position++;
 1192               if (c == '\n') {
 1193                   fCurrentEntity.lineNumber++;
 1194                   fCurrentEntity.columnNumber = 1;
 1195               }
 1196               else {
 1197                   fCurrentEntity.columnNumber++;
 1198               }
 1199               if (DEBUG_BUFFER) {
 1200                   System.out.print(")skipChar, '"+(char)c+"': ");
 1201                   XMLEntityManager.print(fCurrentEntity);
 1202                   System.out.println(" -> true");
 1203               }
 1204               return true;
 1205           }
 1206           else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
 1207               // handle newlines
 1208               if (fCurrentEntity.position == fCurrentEntity.count) {
 1209                   fCurrentEntity.ch[0] = (char)cc;
 1210                   load(1, false);
 1211               }
 1212               fCurrentEntity.position++;
 1213               if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
 1214                   fCurrentEntity.position++;
 1215               }
 1216               fCurrentEntity.lineNumber++;
 1217               fCurrentEntity.columnNumber = 1;
 1218               if (DEBUG_BUFFER) {
 1219                   System.out.print(")skipChar, '"+(char)c+"': ");
 1220                   XMLEntityManager.print(fCurrentEntity);
 1221                   System.out.println(" -> true");
 1222               }
 1223               return true;
 1224           }
 1225   
 1226           // character was not skipped
 1227           if (DEBUG_BUFFER) {
 1228               System.out.print(")skipChar, '"+(char)c+"': ");
 1229               XMLEntityManager.print(fCurrentEntity);
 1230               System.out.println(" -> false");
 1231           }
 1232           return false;
 1233   
 1234       } // skipChar(int):boolean
 1235   
 1236       /**
 1237        * Skips space characters appearing immediately on the input.
 1238        * <p>
 1239        * <strong>Note:</strong> The characters are consumed only if they are
 1240        * space characters.
 1241        *
 1242        * @return Returns true if at least one space character was skipped.
 1243        *
 1244        * @throws IOException  Thrown if i/o error occurs.
 1245        * @throws EOFException Thrown on end of file.
 1246        *
 1247        * @see org.apache.xerces.util.XMLChar#isSpace
 1248        */
 1249       public boolean skipSpaces() throws IOException {
 1250           if (DEBUG_BUFFER) {
 1251               System.out.print("(skipSpaces: ");
 1252               XMLEntityManager.print(fCurrentEntity);
 1253               System.out.println();
 1254           }
 1255   
 1256           // load more characters, if needed
 1257           if (fCurrentEntity.position == fCurrentEntity.count) {
 1258               load(0, true);
 1259           }
 1260   
 1261           // skip spaces
 1262           int c = fCurrentEntity.ch[fCurrentEntity.position];
 1263           if (XMLChar.isSpace(c)) {
 1264               boolean external = fCurrentEntity.isExternal();
 1265               do {
 1266                   boolean entityChanged = false;
 1267                   // handle newlines
 1268                   if (c == '\n' || (external && c == '\r')) {
 1269                       fCurrentEntity.lineNumber++;
 1270                       fCurrentEntity.columnNumber = 1;
 1271                       if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 1272                           fCurrentEntity.ch[0] = (char)c;
 1273                           entityChanged = load(1, true);
 1274                           if (!entityChanged) {
 1275                               // the load change the position to be 1,
 1276                               // need to restore it when entity not changed
 1277                               fCurrentEntity.position = 0;
 1278                               fCurrentEntity.startPosition = 0;
 1279                           }
 1280                       }
 1281                       if (c == '\r' && external) {
 1282                           // REVISIT: Does this need to be updated to fix the
 1283                           //          #x0D ^#x0A newline normalization problem? -Ac
 1284                           if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
 1285                               fCurrentEntity.position--;
 1286                           }
 1287                       }
 1288                       /*** NEWLINE NORMALIZATION ***
 1289                       else {
 1290                           if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
 1291                               && external) {
 1292                               fCurrentEntity.position++;
 1293                           }
 1294                       }
 1295                       /***/
 1296                   }
 1297                   else {
 1298                       fCurrentEntity.columnNumber++;
 1299                   }
 1300                   // load more characters, if needed
 1301                   if (!entityChanged)
 1302                       fCurrentEntity.position++;
 1303                   if (fCurrentEntity.position == fCurrentEntity.count) {
 1304                       load(0, true);
 1305                   }
 1306               } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
 1307               if (DEBUG_BUFFER) {
 1308                   System.out.print(")skipSpaces: ");
 1309                   XMLEntityManager.print(fCurrentEntity);
 1310                   System.out.println(" -> true");
 1311               }
 1312               return true;
 1313           }
 1314   
 1315           // no spaces were found
 1316           if (DEBUG_BUFFER) {
 1317               System.out.print(")skipSpaces: ");
 1318               XMLEntityManager.print(fCurrentEntity);
 1319               System.out.println(" -> false");
 1320           }
 1321           return false;
 1322   
 1323       } // skipSpaces():boolean
 1324   
 1325       /**
 1326        * Skips space characters appearing immediately on the input that would
 1327        * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 
 1328        * normalization is performed. This is useful when scanning structures 
 1329        * such as the XMLDecl and TextDecl that can only contain US-ASCII 
 1330        * characters.
 1331        * <p>
 1332        * <strong>Note:</strong> The characters are consumed only if they would
 1333        * match non-terminal S before end of line normalization is performed.
 1334        *
 1335        * @return Returns true if at least one space character was skipped.
 1336        *
 1337        * @throws IOException  Thrown if i/o error occurs.
 1338        * @throws EOFException Thrown on end of file.
 1339        *
 1340        * @see org.apache.xerces.util.XMLChar#isSpace
 1341        */
 1342       public final boolean skipDeclSpaces() throws IOException {
 1343           if (DEBUG_BUFFER) {
 1344               System.out.print("(skipDeclSpaces: ");
 1345               XMLEntityManager.print(fCurrentEntity);
 1346               System.out.println();
 1347           }
 1348   
 1349           // load more characters, if needed
 1350           if (fCurrentEntity.position == fCurrentEntity.count) {
 1351               load(0, true);
 1352           }
 1353   
 1354           // skip spaces
 1355           int c = fCurrentEntity.ch[fCurrentEntity.position];
 1356           if (XMLChar.isSpace(c)) {
 1357               boolean external = fCurrentEntity.isExternal();
 1358               do {
 1359                   boolean entityChanged = false;
 1360                   // handle newlines
 1361                   if (c == '\n' || (external && c == '\r')) {
 1362                       fCurrentEntity.lineNumber++;
 1363                       fCurrentEntity.columnNumber = 1;
 1364                       if (fCurrentEntity.position == fCurrentEntity.count - 1) {
 1365                           fCurrentEntity.ch[0] = (char)c;
 1366                           entityChanged = load(1, true);
 1367                           if (!entityChanged) {
 1368                               // the load change the position to be 1,
 1369                               // need to restore it when entity not changed
 1370                               fCurrentEntity.position = 0;
 1371                               fCurrentEntity.startPosition = 0;
 1372                           }
 1373                       }
 1374                       if (c == '\r' && external) {
 1375                           // REVISIT: Does this need to be updated to fix the
 1376                           //          #x0D ^#x0A newline normalization problem? -Ac
 1377                           if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
 1378                               fCurrentEntity.position--;
 1379                           }
 1380                       }
 1381                       /*** NEWLINE NORMALIZATION ***
 1382                       else {
 1383                           if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
 1384                               && external) {
 1385                               fCurrentEntity.position++;
 1386                           }
 1387                       }
 1388                       /***/
 1389                   }
 1390                   else {
 1391                       fCurrentEntity.columnNumber++;
 1392                   }
 1393                   // load more characters, if needed
 1394                   if (!entityChanged)
 1395                       fCurrentEntity.position++;
 1396                   if (fCurrentEntity.position == fCurrentEntity.count) {
 1397                       load(0, true);
 1398                   }
 1399               } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
 1400               if (DEBUG_BUFFER) {
 1401                   System.out.print(")skipDeclSpaces: ");
 1402                   XMLEntityManager.print(fCurrentEntity);
 1403                   System.out.println(" -> true");
 1404               }
 1405               return true;
 1406           }
 1407   
 1408           // no spaces were found
 1409           if (DEBUG_BUFFER) {
 1410               System.out.print(")skipDeclSpaces: ");
 1411               XMLEntityManager.print(fCurrentEntity);
 1412               System.out.println(" -> false");
 1413           }
 1414           return false;
 1415   
 1416       } // skipDeclSpaces():boolean
 1417   
 1418       /**
 1419        * Skips the specified string appearing immediately on the input.
 1420        * <p>
 1421        * <strong>Note:</strong> The characters are consumed only if they are
 1422        * space characters.
 1423        *
 1424        * @param s The string to skip.
 1425        *
 1426        * @return Returns true if the string was skipped.
 1427        *
 1428        * @throws IOException  Thrown if i/o error occurs.
 1429        * @throws EOFException Thrown on end of file.
 1430        */
 1431       public boolean skipString(String s) throws IOException {
 1432           if (DEBUG_BUFFER) {
 1433               System.out.print("(skipString, \""+s+"\": ");
 1434               XMLEntityManager.print(fCurrentEntity);
 1435               System.out.println();
 1436           }
 1437   
 1438           // load more characters, if needed
 1439           if (fCurrentEntity.position == fCurrentEntity.count) {
 1440               load(0, true);
 1441           }
 1442   
 1443           // skip string
 1444           final int length = s.length();
 1445           for (int i = 0; i < length; i++) {
 1446               char c = fCurrentEntity.ch[fCurrentEntity.position++];
 1447               if (c != s.charAt(i)) {
 1448                   fCurrentEntity.position -= i + 1;
 1449                   if (DEBUG_BUFFER) {
 1450                       System.out.print(")skipString, \""+s+"\": ");
 1451                       XMLEntityManager.print(fCurrentEntity);
 1452                       System.out.println(" -> false");
 1453                   }
 1454                   return false;
 1455               }
 1456               if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
 1457                   System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
 1458                   // REVISIT: Can a string to be skipped cross an
 1459                   //          entity boundary? -Ac
 1460                   if (load(i + 1, false)) {
 1461                       fCurrentEntity.startPosition -= i + 1; 
 1462                       fCurrentEntity.position -= i + 1;
 1463                       if (DEBUG_BUFFER) {
 1464                           System.out.print(")skipString, \""+s+"\": ");
 1465                           XMLEntityManager.print(fCurrentEntity);
 1466                           System.out.println(" -> false");
 1467                       }
 1468                       return false;
 1469                   }
 1470               }
 1471           }
 1472           if (DEBUG_BUFFER) {
 1473               System.out.print(")skipString, \""+s+"\": ");
 1474               XMLEntityManager.print(fCurrentEntity);
 1475               System.out.println(" -> true");
 1476           }
 1477           fCurrentEntity.columnNumber += length;
 1478           return true;
 1479   
 1480       } // skipString(String):boolean
 1481   
 1482       //
 1483       // Locator methods
 1484       //
 1485   
 1486       /**
 1487        * Return the public identifier for the current document event.
 1488        * <p>
 1489        * The return value is the public identifier of the document
 1490        * entity or of the external parsed entity in which the markup
 1491        * triggering the event appears.
 1492        *
 1493        * @return A string containing the public identifier, or
 1494        *         null if none is available.
 1495        */
 1496       public final String getPublicId() {
 1497           return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
 1498       } // getPublicId():String
 1499   
 1500       /**
 1501        * Return the expanded system identifier for the current document event.
 1502        * <p>
 1503        * The return value is the expanded system identifier of the document
 1504        * entity or of the external parsed entity in which the markup
 1505        * triggering the event appears.
 1506        * <p>
 1507        * If the system identifier is a URL, the parser must resolve it
 1508        * fully before passing it to the application.
 1509        *
 1510        * @return A string containing the expanded system identifier, or null
 1511        *         if none is available.
 1512        */
 1513       public final String getExpandedSystemId() {
 1514           if (fCurrentEntity != null) {
 1515               if (fCurrentEntity.entityLocation != null &&
 1516                       fCurrentEntity.entityLocation.getExpandedSystemId() != null ) {
 1517                   return fCurrentEntity.entityLocation.getExpandedSystemId();
 1518               }
 1519               else {
 1520                   // get the current entity to return something appropriate:
 1521                   return fCurrentEntity.getExpandedSystemId();
 1522               }
 1523           }
 1524           return null;
 1525       } // getExpandedSystemId():String
 1526   
 1527       /**
 1528        * Return the literal system identifier for the current document event.
 1529        * <p>
 1530        * The return value is the literal system identifier of the document
 1531        * entity or of the external parsed entity in which the markup
 1532        * triggering the event appears.
 1533        * <p>
 1534        * @return A string containing the literal system identifier, or null
 1535        *         if none is available.
 1536        */
 1537       public final String getLiteralSystemId() {
 1538           if (fCurrentEntity != null) {
 1539               if (fCurrentEntity.entityLocation != null &&
 1540                       fCurrentEntity.entityLocation.getLiteralSystemId() != null ) {
 1541                   return fCurrentEntity.entityLocation.getLiteralSystemId();
 1542               }
 1543               else {
 1544                   // get the current entity to do it:
 1545                   return fCurrentEntity.getLiteralSystemId();
 1546               }
 1547           }
 1548           return null;
 1549       } // getLiteralSystemId():String
 1550   
 1551       /**
 1552        * Returns the line number where the current document event ends.
 1553        * <p>
 1554        * <strong>Warning:</strong> The return value from the method
 1555        * is intended only as an approximation for the sake of error
 1556        * reporting; it is not intended to provide sufficient information
 1557        * to edit the character content of the original XML document.
 1558        * <p>
 1559        * The return value is an approximation of the line number
 1560        * in the document entity or external parsed entity where the
 1561        * markup triggering the event appears.
 1562        * <p>
 1563        * If possible, the line position of the first character after the 
 1564        * text associated with the document event should be provided.
 1565        * The first line in the document is line 1.
 1566        *
 1567        * @return The line number, or -1 if none is available.
 1568        */
 1569       public final int getLineNumber() {
 1570           if (fCurrentEntity != null) {
 1571               if (fCurrentEntity.isExternal()) {
 1572                   return fCurrentEntity.lineNumber;
 1573               }
 1574               else {
 1575                   // ask the current entity to return something appropriate:
 1576                   return fCurrentEntity.getLineNumber();
 1577               }
 1578           }
 1579   
 1580           return -1;
 1581   
 1582       } // getLineNumber():int
 1583   
 1584       /**
 1585        * Returns the column number where the current document event ends.
 1586        * <p>
 1587        * <strong>Warning:</strong> The return value from the method
 1588        * is intended only as an approximation for the sake of error
 1589        * reporting; it is not intended to provide sufficient information
 1590        * to edit the character content of the original XML document.
 1591        * <p>
 1592        * The return value is an approximation of the column number
 1593        * in the document entity or external parsed entity where the
 1594        * markup triggering the event appears.
 1595        * <p>
 1596        * If possible, the line position of the first character after the 
 1597        * text associated with the document event should be provided.
 1598        * The first column in each line is column 1.
 1599        *
 1600        * @return The column number, or -1 if none is available.
 1601        */
 1602       public final int getColumnNumber() {
 1603           if (fCurrentEntity != null) {
 1604               if (fCurrentEntity.isExternal()) {
 1605                   return fCurrentEntity.columnNumber;
 1606               }
 1607               else {
 1608                   // ask current entity to find appropriate column number
 1609                   return fCurrentEntity.getColumnNumber();
 1610               }
 1611           }
 1612   
 1613           return -1;
 1614       } // getColumnNumber():int
 1615       
 1616       /**
 1617        * Returns the character offset where the current document event ends.
 1618        * <p>
 1619        * <strong>Warning:</strong> The return value from the method
 1620        * is intended only as an approximation for the sake of error
 1621        * reporting; it is not intended to provide sufficient information
 1622        * to edit the character content of the original XML document.
 1623        * <p>
 1624        * The return value is an approximation of the character offset
 1625        * in the document entity or external parsed entity where the
 1626        * markup triggering the event appears.
 1627        * <p>
 1628        * If possible, the character offset of the first character after the 
 1629        * text associated with the document event should be provided.
 1630        *
 1631        * @return The character offset, or -1 if none is available.
 1632        */
 1633       public final int getCharacterOffset() {
 1634           if (fCurrentEntity != null) {
 1635               if (fCurrentEntity.isExternal()) {
 1636                   return fCurrentEntity.baseCharOffset + (fCurrentEntity.position - fCurrentEntity.startPosition);
 1637               }
 1638               else {
 1639                   // ask current entity to find appropriate character offset
 1640                   return fCurrentEntity.getCharacterOffset();
 1641               }
 1642           }
 1643           
 1644           return -1;
 1645       } // getCharacterOffset():int
 1646       
 1647       /** 
 1648        * Returns the encoding of the current entity.  
 1649        * Note that, for a given entity, this value can only be
 1650        * considered final once the encoding declaration has been read (or once it
 1651        * has been determined that there is no such declaration) since, no encoding
 1652        * having been specified on the XMLInputSource, the parser
 1653        * will make an initial "guess" which could be in error. 
 1654        */
 1655       public final String getEncoding() {
 1656           if (fCurrentEntity != null) {
 1657               if (fCurrentEntity.isExternal()) {
 1658                   return fCurrentEntity.encoding;
 1659               }
 1660               else {
 1661                   // ask current entity to find appropriate encoding
 1662                   return fCurrentEntity.getEncoding();
 1663               }
 1664           }
 1665           return null;
 1666       } // getEncoding():String
 1667       
 1668       /**
 1669        * Returns the XML version of the current entity. This will normally be the
 1670        * value from the XML or text declaration or defaulted by the parser. Note that
 1671        * that this value may be different than the version of the processing rules 
 1672        * applied to the current entity. For instance, an XML 1.1 document may refer to
 1673        * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 
 1674        * document. Also note that, for a given entity, this value can only be considered
 1675        * final once the XML or text declaration has been read or once it has been
 1676        * determined that there is no such declaration.
 1677        */
 1678       public final String getXMLVersion() {
 1679           if (fCurrentEntity != null) {
 1680               if (fCurrentEntity.isExternal()) {
 1681                   return fCurrentEntity.xmlVersion;
 1682               }
 1683               else {
 1684                   // ask current entity to find the appropriate XML version
 1685                   return fCurrentEntity.getXMLVersion();
 1686               }
 1687           }
 1688           return null;
 1689       } // getXMLVersion():String
 1690       
 1691       // allow entity manager to tell us what the current entityis:
 1692       public final void setCurrentEntity(XMLEntityManager.ScannedEntity ent) {
 1693           fCurrentEntity = ent;
 1694       }
 1695   
 1696       // set buffer size:
 1697       public final void setBufferSize(int size) {
 1698           // REVISIT: Buffer size passed to entity scanner 
 1699           // was not being kept in synch with the actual size
 1700           // of the buffers in each scanned entity. If any
 1701           // of the buffers were actually resized, it was possible
 1702           // that the parser would throw an ArrayIndexOutOfBoundsException
 1703           // for documents which contained names which are longer than
 1704           // the current buffer size. Conceivably the buffer size passed
 1705           // to entity scanner could be used to determine a minimum size
 1706           // for resizing, if doubling its size is smaller than this
 1707           // minimum. -- mrglavas 
 1708           fBufferSize = size;
 1709       }
 1710   
 1711       // reset what little state we have...
 1712       public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
 1713                           XMLErrorReporter reporter) {
 1714           fCurrentEntity = null;
 1715           fSymbolTable = symbolTable;
 1716           fEntityManager = entityManager;
 1717           fErrorReporter = reporter;
 1718       }
 1719   
 1720       //
 1721       // Private methods
 1722       //
 1723   
 1724       /**
 1725        * Loads a chunk of text.
 1726        *
 1727        * @param offset       The offset into the character buffer to
 1728        *                     read the next batch of characters.
 1729        * @param changeEntity True if the load should change entities
 1730        *                     at the end of the entity, otherwise leave
 1731        *                     the current entity in place and the entity
 1732        *                     boundary will be signaled by the return
 1733        *                     value.
 1734        *
 1735        * @returns Returns true if the entity changed as a result of this
 1736        *          load operation.
 1737        */
 1738       final boolean load(int offset, boolean changeEntity)
 1739           throws IOException {
 1740           if (DEBUG_BUFFER) {
 1741               System.out.print("(load, "+offset+": ");
 1742               XMLEntityManager.print(fCurrentEntity);
 1743               System.out.println();
 1744           }
 1745   
 1746           fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
 1747           // read characters
 1748           int length = fCurrentEntity.mayReadChunks?
 1749                   (fCurrentEntity.ch.length - offset):
 1750                   (XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE);
 1751           if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
 1752           int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
 1753           if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
 1754   
 1755           // reset count and position
 1756           boolean entityChanged = false;
 1757           if (count != -1) {
 1758               if (count != 0) {
 1759                   fCurrentEntity.count = count + offset;
 1760                   fCurrentEntity.position = offset;
 1761                   fCurrentEntity.startPosition = offset;
 1762               }
 1763           }
 1764   
 1765           // end of this entity
 1766           else {
 1767               fCurrentEntity.count = offset;
 1768               fCurrentEntity.position = offset;
 1769               fCurrentEntity.startPosition = offset;
 1770               entityChanged = true;
 1771               if (changeEntity) {
 1772                   fEntityManager.endEntity();
 1773                   if (fCurrentEntity == null) {
 1774                       throw END_OF_DOCUMENT_ENTITY;
 1775                   }
 1776                   // handle the trailing edges
 1777                   if (fCurrentEntity.position == fCurrentEntity.count) {
 1778                       load(0, true);
 1779                   }
 1780               }
 1781           }
 1782           if (DEBUG_BUFFER) {
 1783               System.out.print(")load, "+offset+": ");
 1784               XMLEntityManager.print(fCurrentEntity);
 1785               System.out.println();
 1786           }
 1787   
 1788           return entityChanged;
 1789   
 1790       } // load(int, boolean):boolean
 1791   
 1792   } // class XMLEntityScanner
 1793   

Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » [javadoc | source]