Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/apache/xerces/framework/XMLDocumentScanner.java


1   /*
2    * The Apache Software License, Version 1.1
3    *
4    *
5    * Copyright (c) 1999,2000 The Apache Software Foundation.  All rights 
6    * reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer. 
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution,
21   *    if any, must include the following acknowledgment:  
22   *       "This product includes software developed by the
23   *        Apache Software Foundation (http://www.apache.org/)."
24   *    Alternately, this acknowledgment may appear in the software itself,
25   *    if and wherever such third-party acknowledgments normally appear.
26   *
27   * 4. The names "Xerces" and "Apache Software Foundation" must
28   *    not be used to endorse or promote products derived from this
29   *    software without prior written permission. For written 
30   *    permission, please contact apache@apache.org.
31   *
32   * 5. Products derived from this software may not be called "Apache",
33   *    nor may "Apache" appear in their name, without prior written
34   *    permission of the Apache Software Foundation.
35   *
36   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47   * SUCH DAMAGE.
48   * ====================================================================
49   *
50   * This software consists of voluntary contributions made by many
51   * individuals on behalf of the Apache Software Foundation and was
52   * originally based on software copyright (c) 1999, International
53   * Business Machines, Inc., http://www.apache.org.  For more
54   * information on the Apache Software Foundation, please see
55   * <http://www.apache.org/>.
56   */
57  
58  package org.apache.xerces.framework;
59  
60  import org.apache.xerces.readers.XMLEntityHandler;
61  import org.apache.xerces.readers.DefaultEntityHandler;
62  import org.apache.xerces.utils.ChunkyCharArray;
63  import org.apache.xerces.utils.QName;
64  import org.apache.xerces.utils.StringPool;
65  import org.apache.xerces.utils.XMLCharacterProperties;
66  import org.apache.xerces.utils.XMLMessages;
67  import org.apache.xerces.validators.common.GrammarResolver;
68  
69  import org.xml.sax.Locator;
70  import org.xml.sax.SAXParseException;
71  
72  /**
73   * This class recognizes most of the grammer for an XML processor.
74   * Additional support is provided by the XMLEntityHandler, via the
75   * XMLEntityReader instances it creates, which are used to process
76   * simple constructs like string literals and character data between
77   * markup.  The XMLDTDScanner class contains the remaining support
78   * for the grammer of DTD declarations.  When a &lt;!DOCTYPE ...&gt; is
79   * found in the document, the scanDoctypeDecl method will then be
80   * called and the XMLDocumentScanner subclass is responsible for
81   * "connecting" that method to the corresponding method provided
82   * by the XMLDTDScanner class.
83   *
84   * @version $Id: XMLDocumentScanner.java,v 1.3 2000/10/07 18:06:53 markd Exp $
85   */
86  public final class XMLDocumentScanner {
87      //
88      // Constants
89      //
90  
91      //
92      // These character arrays are used as parameters for calls to the
93      // XMLEntityHandler.EntityReader skippedString() method.  Some have
94      // package access for use by the inner dispatcher classes.
95      //
96  
97      //
98      // [19] CDStart ::= '<![CDATA['
99      //
100     static final char[] cdata_string = { '[','C','D','A','T','A','[' };
101     //
102     // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
103     // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
104     //
105     static final char[] xml_string = { 'x','m','l' };
106     //
107     // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
108     //
109     private static final char[] version_string = { 'v','e','r','s','i','o','n' };
110     //
111     // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
112     //                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
113     //
114     static final char[] doctype_string = { 'D','O','C','T','Y','P','E' };
115     //
116     // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
117     //                 | ('"' ('yes' | 'no') '"'))
118     //
119     private static final char[] standalone_string = { 's','t','a','n','d','a','l','o','n','e' };
120     //
121     // [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
122     //
123     private static final char[] encoding_string = { 'e','n','c','o','d','i','n','g' };
124 
125     /*
126      * Return values for the EventHandler scanAttValue method.
127      */
128     public static final int
129         RESULT_SUCCESS          =  0,
130         RESULT_FAILURE          = -1,
131         RESULT_DUPLICATE_ATTR   = -2;
132 
133     /** Scanner states */
134     static final int
135         SCANNER_STATE_XML_DECL                  =  0,
136         SCANNER_STATE_START_OF_MARKUP           =  1,
137         SCANNER_STATE_COMMENT                   =  2,
138         SCANNER_STATE_PI                        =  3,
139         SCANNER_STATE_DOCTYPE                   =  4,
140         SCANNER_STATE_PROLOG                    =  5,
141         SCANNER_STATE_ROOT_ELEMENT              =  6,
142         SCANNER_STATE_CONTENT                   =  7,
143         SCANNER_STATE_REFERENCE                 =  8,
144         SCANNER_STATE_ATTRIBUTE_LIST            =  9,
145         SCANNER_STATE_ATTRIBUTE_NAME            = 10,
146         SCANNER_STATE_ATTRIBUTE_VALUE           = 11,
147         SCANNER_STATE_TRAILING_MISC             = 12,
148         SCANNER_STATE_END_OF_INPUT              = 13,
149         SCANNER_STATE_TERMINATED                = 14;
150 
151     //
152     // Instance Variables
153     //
154     /***/
155     // NOTE: Used by old implementation of scanElementType method. -Ac
156     private StringPool.CharArrayRange fCurrentElementCharArrayRange = null;
157     /***/
158     int fAttrListHandle = -1;
159     XMLAttrList fAttrList = null;
160     GrammarResolver fGrammarResolver = null;
161     XMLDTDScanner fDTDScanner = null;
162     boolean fNamespacesEnabled = false;
163     boolean fValidationEnabled = false;
164     QName fElementQName = new QName();
165     QName fAttributeQName = new QName();
166     QName fCurrentElementQName = new QName();
167     ScannerDispatcher fDispatcher = null;
168     EventHandler fEventHandler = null;
169     XMLDocumentHandler.DTDHandler fDTDHandler = null;
170     StringPool fStringPool = null;
171     XMLErrorReporter fErrorReporter = null;
172     XMLEntityHandler fEntityHandler = null;
173     XMLEntityHandler.EntityReader fEntityReader = null;
174     XMLEntityHandler.CharBuffer fLiteralData = null;
175     boolean fSeenRootElement = false;
176     boolean fSeenDoctypeDecl = false;
177     boolean fStandalone = false;
178     boolean fParseTextDecl = false;
179     boolean fScanningDTD = false;
180     int fScannerState = SCANNER_STATE_XML_DECL;
181     int fReaderId = -1;
182     int fAttValueReader = -1;
183     int fAttValueElementType = -1;
184     int fAttValueAttrName = -1;
185     int fAttValueOffset = -1;
186     int fAttValueMark = -1;
187     int fScannerMarkupDepth = 0;
188 
189     //
190     // Interfaces
191     //
192 
193     /**
194      * This interface must be implemented by the users of the XMLDocumentScanner class.
195      * These methods form the abstraction between the implementation semantics and the
196      * more generic task of scanning the XML non-DTD grammar.
197      */
198     public interface EventHandler {
199         /**
200          * Signal standalone = "yes"
201          *
202          * @exception java.lang.Exception
203          */
204         public void callStandaloneIsYes() throws Exception;
205 
206         /**
207          * Signal the start of a document
208          *
209          * @exception java.lang.Exception
210          */
211         public void callStartDocument() throws Exception;
212         /**
213          * Signal the end of a document
214          *
215          * @exception java.lang.Exception
216          */
217         public void callEndDocument() throws Exception;
218         /**
219          * Signal the XML declaration of a document
220          *
221          * @param version the handle in the string pool for the version number
222          * @param encoding the handle in the string pool for the encoding
223          * @param standalong the handle in the string pool for the standalone value
224          * @exception java.lang.Exception
225          */
226         public void callXMLDecl(int version, int encoding, int standalone) throws Exception;
227         /**
228          * Signal the Text declaration of an external entity.
229          *
230          * @param version the handle in the string pool for the version number
231          * @param encoding the handle in the string pool for the encoding
232          * @exception java.lang.Exception
233          */
234         public void callTextDecl(int version, int encoding) throws Exception;
235         /**
236          * signal the scanning of a start element tag
237          * 
238          * @param element Element name scanned.
239          * @exception java.lang.Exception
240          */
241         public void callStartElement(QName element) throws Exception;
242         /**
243          * Signal the scanning of an element name in a start element tag.
244          *
245          * @param element Element name scanned.
246          */
247         public void element(QName element) throws Exception;
248         /**
249          * Signal the scanning of an attribute associated to the previous
250          * start element tag.
251          *
252          * @param element Element name scanned.
253          * @param attrName Attribute name scanned.
254          * @param attrValue The string pool index of the attribute value.
255          */
256         public boolean attribute(QName element, QName attrName, int attrValue) throws Exception;
257         /**
258          * signal the scanning of an end element tag
259          *
260          * @param readerId the Id of the reader being used to scan the end tag.
261          * @exception java.lang.Exception
262          */
263         public void callEndElement(int readerId) throws Exception;
264         /**
265          * Signal the start of a CDATA section
266          * @exception java.lang.Exception
267          */
268         public void callStartCDATA() throws Exception;
269         /**
270          * Signal the end of a CDATA section
271          * @exception java.lang.Exception
272          */
273         public void callEndCDATA() throws Exception;
274         /**
275          * Report the scanning of character data
276          *
277          * @param ch the handle in the string pool of the character data that was scanned
278          * @exception java.lang.Exception
279          */
280         public void callCharacters(int ch) throws Exception;
281         /**
282          * Report the scanning of a processing instruction
283          *
284          * @param piTarget the handle in the string pool of the processing instruction targe
285          * @param piData the handle in the string pool of the processing instruction data
286          * @exception java.lang.Exception
287          */
288         public void callProcessingInstruction(int piTarget, int piData) throws Exception;
289         /**
290          * Report the scanning of a comment
291          *
292          * @param data the handle in the string pool of the comment text
293          * @exception java.lang.Exception
294          */
295         public void callComment(int data) throws Exception;
296     }
297 
298     /**
299      * Constructor
300      */
301     public XMLDocumentScanner(StringPool stringPool,
302                               XMLErrorReporter errorReporter,
303                               XMLEntityHandler entityHandler,
304                               XMLEntityHandler.CharBuffer literalData) {
305         fStringPool = stringPool;
306         fErrorReporter = errorReporter;
307         fEntityHandler = entityHandler;
308         fLiteralData = literalData;
309         fDispatcher = new XMLDeclDispatcher();
310         fAttrList = new XMLAttrList(fStringPool);
311     }
312 
313     /**
314      * Set the event handler
315      *
316      * @param eventHandler The place to send our callbacks.
317      */
318     public void setEventHandler(XMLDocumentScanner.EventHandler eventHandler) {
319         fEventHandler = eventHandler;
320     }
321 
322     /** Set the DTD handler. */
323     public void setDTDHandler(XMLDocumentHandler.DTDHandler dtdHandler) {
324         fDTDHandler = dtdHandler;
325     }
326 
327     /** Sets the grammar resolver. */
328     public void setGrammarResolver(GrammarResolver resolver) {
329         fGrammarResolver = resolver;
330     }
331 
332     /**
333      * reset the parser so that the instance can be reused
334      *
335      * @param stringPool the string pool instance to be used by the reset parser
336      */
337     public void reset(StringPool stringPool, XMLEntityHandler.CharBuffer literalData) {
338         fStringPool = stringPool;
339         fLiteralData = literalData;
340         fParseTextDecl = false;
341         fSeenRootElement = false;
342         fSeenDoctypeDecl = false;
343         fStandalone = false;
344         fScanningDTD = false;
345         fDispatcher = new XMLDeclDispatcher();
346         fScannerState = SCANNER_STATE_XML_DECL;
347         fScannerMarkupDepth = 0;
348         fAttrList = new XMLAttrList(fStringPool);
349     }
350 
351     //
352     // From the standard:
353     //
354     // [1] document ::= prolog element Misc*
355     //
356     // [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
357     // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
358     // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
359     //
360     // The beginning of XMLDecl simplifies to:
361     //    '<?xml' S ...
362     //
363     // [27] Misc ::= Comment | PI |  S
364     // [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
365     // [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
366     // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
367     //
368     // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
369     //                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
370     //
371     /**
372      * Entry point for parsing
373      *
374      * @param doItAll if true the entire document is parsed otherwise just 
375      *                the next segment of the document is parsed
376      */
377     public boolean parseSome(boolean doItAll) throws Exception
378     {
379         do {
380             if (!fDispatcher.dispatch(doItAll))
381                 return false;
382         } while (doItAll);
383         return true;
384     }
385 
386     /**
387      * Change readers
388      *
389      * @param nextReader the new reader that the scanner will use
390      * @param nextReaderId id of the reader to change to
391      * @exception throws java.lang.Exception
392      */
393     public void readerChange(XMLEntityHandler.EntityReader nextReader, int nextReaderId) throws Exception {
394         fEntityReader = nextReader;
395         fReaderId = nextReaderId;
396         if (fScannerState == SCANNER_STATE_ATTRIBUTE_VALUE) {
397             fAttValueOffset = fEntityReader.currentOffset();
398             fAttValueMark = fAttValueOffset;
399         }
400 
401         //also propagate the change to DTDScanner if there is one
402         if (fDTDScanner != null && fScanningDTD)
403             fDTDScanner.readerChange(nextReader, nextReaderId);
404     }
405 
406     /**
407      * Handle the end of input
408      *
409      * @param entityName the handle in the string pool of the name of the entity which has reached end of input
410      * @param moreToFollow if true, there is still input left to process in other readers
411      * @exception java.lang.Exception
412      */
413     public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
414         if (fDTDScanner != null && fScanningDTD){
415             fDTDScanner.endOfInput(entityName, moreToFollow);
416         }
417         fDispatcher.endOfInput(entityName, moreToFollow);
418     }
419 
420     /** 
421      * Tell if scanner has reached end of input
422      * @return true if scanner has reached end of input.
423      */
424     public boolean atEndOfInput() {
425         return fScannerState == SCANNER_STATE_END_OF_INPUT;
426     }
427 
428     //
429     // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
430     //
431     /**
432      * Scan an attribute value
433      *
434      * @param elementType handle to the element whose attribute value is being scanned
435      * @param attrName handle in the string pool of the name of attribute being scanned
436      * @param asSymbol controls whether the value is a string (duplicates allowed) or a symbol (duplicates not allowed)
437      * @return handle in the string pool of the scanned value
438      * @exception java.lang.Exception
439      */
440     public int scanAttValue(QName element, QName attribute, boolean asSymbol) throws Exception {
441         boolean single;
442         if (!(single = fEntityReader.lookingAtChar('\'', true)) && !fEntityReader.lookingAtChar('\"', true)) {
443             reportFatalXMLError(XMLMessages.MSG_QUOTE_REQUIRED_IN_ATTVALUE,
444                                 XMLMessages.P10_QUOTE_REQUIRED,
445                                 element.rawname,
446                                 attribute.rawname);
447             return -1;
448         }
449         char qchar = single ? '\'' : '\"';
450         fAttValueMark = fEntityReader.currentOffset();
451         int attValue = fEntityReader.scanAttValue(qchar, asSymbol);
452         if (attValue >= 0)
453             return attValue;
454         int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_VALUE);
455         fAttValueReader = fReaderId;
456         // REVISIT: What should this be?
457         fAttValueElementType = element.rawname;
458         // REVISIT: What should this be?
459         fAttValueAttrName = attribute.rawname;
460         fAttValueOffset = fEntityReader.currentOffset();
461         int dataOffset = fLiteralData.length();
462         if (fAttValueOffset - fAttValueMark > 0)
463             fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
464         fAttValueMark = fAttValueOffset;
465         boolean setMark = false;
466         boolean skippedCR;
467         while (true) {
468             if (fEntityReader.lookingAtChar(qchar, true)) {
469                 if (fReaderId == fAttValueReader)
470                     break;
471             } else if (fEntityReader.lookingAtChar(' ', true)) {
472                 //
473                 // no action required
474                 //
475             } else if ((skippedCR = fEntityReader.lookingAtChar((char)0x0D, true)) || fEntityReader.lookingAtSpace(true)) {
476                 if (fAttValueOffset - fAttValueMark > 0)
477                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
478                 setMark = true;
479                 fLiteralData.append(' ');
480                 if (skippedCR) {
481                     //
482                     // REVISIT - HACK !!!  code changed to pass incorrect OASIS test 'valid-sa-110'
483                     //  Uncomment the next line to conform to the spec...
484                     //
485                     //fEntityReader.lookingAtChar((char)0x0A, true);
486                 }
487             } else if (fEntityReader.lookingAtChar('&', true)) {
488                 if (fAttValueOffset - fAttValueMark > 0)
489                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
490                 setMark = true;
491                 //
492                 // Check for character reference first.
493                 //
494                 if (fEntityReader.lookingAtChar('#', true)) {
495                     int ch = scanCharRef();
496                     if (ch != -1) {
497                         if (ch < 0x10000)
498                             fLiteralData.append((char)ch);
499                         else {
500                             fLiteralData.append((char)(((ch-0x00010000)>>10)+0xd800));
501                             fLiteralData.append((char)(((ch-0x00010000)&0x3ff)+0xdc00));
502                         }
503                     }
504                 } else {
505                     //
506                     // Entity reference
507                     //
508                     int nameOffset = fEntityReader.currentOffset();
509                     fEntityReader.skipPastName(';');
510                     int nameLength = fEntityReader.currentOffset() - nameOffset;
511                     if (nameLength == 0) {
512                         reportFatalXMLError(XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE,
513                                             XMLMessages.P68_NAME_REQUIRED);
514                     } else if (!fEntityReader.lookingAtChar(';', true)) {
515                         reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE,
516                                             XMLMessages.P68_SEMICOLON_REQUIRED,
517                                             fEntityReader.addString(nameOffset, nameLength));
518                     } else {
519                         int entityName = fEntityReader.addSymbol(nameOffset, nameLength);
520                         fEntityHandler.startReadingFromEntity(entityName, fScannerMarkupDepth, XMLEntityHandler.ENTITYREF_IN_ATTVALUE);
521                     }
522                 }
523             } else if (fEntityReader.lookingAtChar('<', true)) {
524                 if (fAttValueOffset - fAttValueMark > 0)
525                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
526                 setMark = true;
527                 reportFatalXMLError(XMLMessages.MSG_LESSTHAN_IN_ATTVALUE,
528                                     XMLMessages.WFC_NO_LESSTHAN_IN_ATTVALUE,
529                                     element.rawname,
530                                     attribute.rawname);
531             } else if (!fEntityReader.lookingAtValidChar(true)) {
532                 if (fAttValueOffset - fAttValueMark > 0)
533                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
534                 setMark = true;
535                 int invChar = fEntityReader.scanInvalidChar();
536                 if (fScannerState == SCANNER_STATE_END_OF_INPUT)
537                     return -1;
538                 if (invChar >= 0) {
539                     reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_ATTVALUE,
540                                         XMLMessages.P10_INVALID_CHARACTER,
541                                         fStringPool.toString(element.rawname),
542                                         fStringPool.toString(attribute.rawname),
543                                         Integer.toHexString(invChar));
544                 }
545             }
546             fAttValueOffset = fEntityReader.currentOffset();
547             if (setMark) {
548                 fAttValueMark = fAttValueOffset;
549                 setMark = false;
550             }
551         }
552         restoreScannerState(previousState);
553         int dataLength = fLiteralData.length() - dataOffset;
554         if (dataLength == 0) {
555             return fEntityReader.addString(fAttValueMark, fAttValueOffset - fAttValueMark);
556         }
557         if (fAttValueOffset - fAttValueMark > 0) {
558             fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
559             dataLength = fLiteralData.length() - dataOffset;
560         }
561         int value = fLiteralData.addString(dataOffset, dataLength);
562         return value;
563     }
564 
565     /**
566      * Check the value of an XML Language attribute
567      * @param langValue the handle in the string pool of the value to be checked
568      * @exception java.lang.Exception
569      */
570     public void checkXMLLangAttributeValue(int langValue) throws Exception {
571         String lang = fStringPool.toString(langValue);
572         int offset = -1;
573         if (lang.length() >= 2) {
574             char ch0 = lang.charAt(0);
575             if (lang.charAt(1) == '-') {
576                 if (ch0 == 'i' || ch0 == 'I' || ch0 == 'x' || ch0 == 'X') {
577                     offset = 1;
578                 }
579             } else {
580                 char ch1 = lang.charAt(1);
581                 if (((ch0 >= 'a' && ch0 <= 'z') || (ch0 >= 'A' && ch0 <= 'Z')) &&
582                     ((ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z'))) {
583                         offset = 2;
584                 }
585             }
586         }
587         if (offset > 0 && lang.length() > offset) {
588             char ch = lang.charAt(offset++);
589             if (ch != '-') {
590                 offset = -1;
591             } else {
592                 while (true) {
593                     if (ch == '-') {
594                         if (lang.length() == offset) {
595                             offset = -1;
596                             break;
597                         }
598                         ch = lang.charAt(offset++);
599                         if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')) {
600                             offset = -1;
601                             break;
602                         }
603                         if (lang.length() == offset)
604                             break;
605                     } else if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')) {
606                         offset = -1;
607                         break;
608                     } else if (lang.length() == offset)
609                         break;
610                     ch = lang.charAt(offset++);
611                 }
612             }
613         }
614         if (offset == -1) {
615             reportFatalXMLError(XMLMessages.MSG_XML_LANG_INVALID,
616                                 XMLMessages.P33_INVALID,
617                                 lang);
618         }
619     }
620 
621     //
622     //
623     //
624     void reportFatalXMLError(int majorCode, int minorCode) throws Exception {
625         fErrorReporter.reportError(fErrorReporter.getLocator(),
626                                    XMLMessages.XML_DOMAIN,
627                                    majorCode,
628                                    minorCode,
629                                    null,
630                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
631     }
632     void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1) throws Exception {
633         Object[] args = { fStringPool.toString(stringIndex1) };
634         fErrorReporter.reportError(fErrorReporter.getLocator(),
635                                    XMLMessages.XML_DOMAIN,
636                                    majorCode,
637                                    minorCode,
638                                    args,
639                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
640     }
641     void reportFatalXMLError(int majorCode, int minorCode, String string1) throws Exception {
642         Object[] args = { string1 };
643         fErrorReporter.reportError(fErrorReporter.getLocator(),
644                                    XMLMessages.XML_DOMAIN,
645                                    majorCode,
646                                    minorCode,
647                                    args,
648                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
649     }
650     void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception {
651         Object[] args = { fStringPool.toString(stringIndex1),
652                           fStringPool.toString(stringIndex2) };
653         fErrorReporter.reportError(fErrorReporter.getLocator(),
654                                    XMLMessages.XML_DOMAIN,
655                                    majorCode,
656                                    minorCode,
657                                    args,
658                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
659     }
660     void reportFatalXMLError(int majorCode, int minorCode, String string1, String string2) throws Exception {
661         Object[] args = { string1, string2 };
662         fErrorReporter.reportError(fErrorReporter.getLocator(),
663                                    XMLMessages.XML_DOMAIN,
664                                    majorCode,
665                                    minorCode,
666                                    args,
667                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
668     }
669     void reportFatalXMLError(int majorCode, int minorCode, String string1, String string2, String string3) throws Exception {
670         Object[] args = { string1, string2, string3 };
671         fErrorReporter.reportError(fErrorReporter.getLocator(),
672                                    XMLMessages.XML_DOMAIN,
673                                    majorCode,
674                                    minorCode,
675                                    args,
676                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
677     }
678     void abortMarkup(int majorCode, int minorCode) throws Exception {
679         reportFatalXMLError(majorCode, minorCode);
680         skipPastEndOfCurrentMarkup();
681     }
682     void abortMarkup(int majorCode, int minorCode, int stringIndex1) throws Exception {
683         reportFatalXMLError(majorCode, minorCode, stringIndex1);
684         skipPastEndOfCurrentMarkup();
685     }
686     void abortMarkup(int majorCode, int minorCode, String string1) throws Exception {
687         reportFatalXMLError(majorCode, minorCode, string1);
688         skipPastEndOfCurrentMarkup();
689     }
690     void abortMarkup(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception {
691         reportFatalXMLError(majorCode, minorCode, stringIndex1, stringIndex2);
692         skipPastEndOfCurrentMarkup();
693     }
694     void skipPastEndOfCurrentMarkup() throws Exception {
695         fEntityReader.skipToChar('>');
696         if (fEntityReader.lookingAtChar('>', true))
697             fScannerMarkupDepth--;
698     }
699     //
700     //
701     //
702     int setScannerState(int state) {
703         int oldState = fScannerState;
704         fScannerState = state;
705         return oldState;
706     }
707     void restoreScannerState(int state) {
708         if (fScannerState != SCANNER_STATE_END_OF_INPUT)
709             fScannerState = state;
710     }
711     //
712     //
713     //
714     /**
715      * The main loop of the scanner is implemented by calling the dispatch method
716      * of ScannerDispatcher with a flag which tells the dispatcher whether to continue
717      * or return.  The scanner logic is split up into dispatchers for various syntatic
718      * components of XML.  //REVISIT more rationale needed
719      */
720     interface ScannerDispatcher {
721         /**
722          * scan an XML syntactic component 
723          *
724          * @param keepgoing if true continue on to the next dispatcher, otherwise return
725          * @return true if scanning was successful //REVISIT - does it ever return false or does it just throw?
726          * @exception java.lang.Exception
727          */
728         boolean dispatch(boolean keepgoing) throws Exception;
729         /**
730          * endOfInput encapsulates the end of entity handling for each dispatcher 
731          *
732          * @param entityName StringPool handle of the entity that has reached the end
733          * @param moreToFollow true if there is more input to be read
734          * @exception
735          */
736         void endOfInput(int entityName, boolean moreToFollow) throws Exception;
737     }
738     final class XMLDeclDispatcher implements ScannerDispatcher {
739         public boolean dispatch(boolean keepgoing) throws Exception {
740             fEventHandler.callStartDocument();
741             if (fEntityReader.lookingAtChar('<', true)) {
742                 fScannerMarkupDepth++;
743                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
744                 if (fEntityReader.lookingAtChar('?', true)) {
745                     int piTarget = fEntityReader.scanName(' ');
746                     if (piTarget == -1) {
747                         abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
748                                     XMLMessages.P16_PITARGET_REQUIRED);
749                     } else if ("xml".equals(fStringPool.toString(piTarget))) {
750                         if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
751                             scanXMLDeclOrTextDecl(false);
752                         } else { // a PI target matching 'xml'
753                             abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
754                                         XMLMessages.P17_RESERVED_PITARGET);
755                         }
756                     } else { // PI
757                       scanPI(piTarget);
758                     }
759                     fDispatcher = new PrologDispatcher();
760                     restoreScannerState(SCANNER_STATE_PROLOG);
761                     return true;
762                 }
763                 if (fEntityReader.lookingAtChar('!', true)) {
764                     if (fEntityReader.lookingAtChar('-', true)) { // comment ?
765                         if (fEntityReader.lookingAtChar('-', true)) {
766                             scanComment(); // scan through the closing '-->'
767                         } else {
768                             abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
769                                         XMLMessages.P22_NOT_RECOGNIZED);
770                         }
771                     } else {
772                         if (fEntityReader.skippedString(doctype_string)) {
773                             setScannerState(SCANNER_STATE_DOCTYPE);
774                             fSeenDoctypeDecl = true;
775                             scanDoctypeDecl(fStandalone); // scan through the closing '>'
776                             fScannerMarkupDepth--;
777                             fDispatcher = new PrologDispatcher();
778                             restoreScannerState(SCANNER_STATE_PROLOG);
779                             return true;
780                         } else {
781                             abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
782                                         XMLMessages.P22_NOT_RECOGNIZED);
783                         }
784                     }
785                 } else {
786                     fDispatcher = new ContentDispatcher();
787                     restoreScannerState(SCANNER_STATE_ROOT_ELEMENT);
788                     return true;
789                 }
790             } else {
791                 if (fEntityReader.lookingAtSpace(true)) {
792                     fEntityReader.skipPastSpaces();
793                 } else if (!fEntityReader.lookingAtValidChar(false)) {
794                     int invChar = fEntityReader.scanInvalidChar();
795                     if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
796                         if (invChar >= 0) {
797                             String arg = Integer.toHexString(invChar);
798                             reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_PROLOG,
799                                                 XMLMessages.P22_INVALID_CHARACTER,
800                                                 arg);
801                         }
802                     }
803                 } else {
804                     reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
805                                         XMLMessages.P22_NOT_RECOGNIZED);
806                     fEntityReader.lookingAtValidChar(true);
807                 }
808             }
809             fDispatcher = new PrologDispatcher();
810             restoreScannerState(SCANNER_STATE_PROLOG);
811             return true;
812         }
813         public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
814             switch (fScannerState) {
815             case SCANNER_STATE_XML_DECL:
816             case SCANNER_STATE_START_OF_MARKUP:
817             case SCANNER_STATE_DOCTYPE:
818                 break;
819             case SCANNER_STATE_COMMENT:
820                 if (!moreToFollow) {
821                     reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
822                                         XMLMessages.P15_UNTERMINATED);
823                 } else {
824                     reportFatalXMLError(XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
825                                         XMLMessages.P78_NOT_WELLFORMED);
826                 }
827                 break;
828             case SCANNER_STATE_PI:
829                 if (!moreToFollow) {
830                     reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
831                                         XMLMessages.P16_UNTERMINATED);
832                 } else {
833                     reportFatalXMLError(XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
834                                         XMLMessages.P78_NOT_WELLFORMED);
835                 }
836                 break;
837             default:
838                 throw new RuntimeException("FWK001 1] ScannerState="+fScannerState+"\n" + "1\t"+fScannerState);
839             }
840             if (!moreToFollow) {
841                 reportFatalXMLError(XMLMessages.MSG_ROOT_ELEMENT_REQUIRED,
842                                     XMLMessages.P1_ELEMENT_REQUIRED);
843                 fDispatcher = new EndOfInputDispatcher();
844                 setScannerState(SCANNER_STATE_END_OF_INPUT);
845             }
846         }
847     }
848     final class PrologDispatcher implements ScannerDispatcher {
849         public boolean dispatch(boolean keepgoing) throws Exception {
850             do {
851                 if (fEntityReader.lookingAtChar('<', true)) {
852                     fScannerMarkupDepth++;
853                     setScannerState(SCANNER_STATE_START_OF_MARKUP);
854                     if (fEntityReader.lookingAtChar('?', true)) {
855                         int piTarget = fEntityReader.scanName(' ');
856                         if (piTarget == -1) {
857                             abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
858                                         XMLMessages.P16_PITARGET_REQUIRED);
859                         } else if ("xml".equals(fStringPool.toString(piTarget))) {
860                             if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
861                                 abortMarkup(XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
862                                             XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
863                             } else { // a PI target matching 'xml'
864                                 abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
865                                             XMLMessages.P17_RESERVED_PITARGET);
866                             }
867                         } else { // PI
868                             scanPI(piTarget);
869                         }
870                     } else if (fEntityReader.lookingAtChar('!', true)) {
871                         if (fEntityReader.lookingAtChar('-', true)) { // comment ?
872                             if (fEntityReader.lookingAtChar('-', true)) {
873                                 scanComment(); // scan through the closing '-->'
874                             } else {
875                                 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
876                                             XMLMessages.P22_NOT_RECOGNIZED);
877                             }
878                         } else {
879                             if (!fSeenDoctypeDecl && fEntityReader.skippedString(doctype_string)) {
880                                 setScannerState(SCANNER_STATE_DOCTYPE);
881                                 fSeenDoctypeDecl = true;
882                                 scanDoctypeDecl(fStandalone); // scan through the closing '>'
883                                 fScannerMarkupDepth--;
884                             } else {
885                                 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
886                                             XMLMessages.P22_NOT_RECOGNIZED);
887                             }
888                         }
889                     } else {
890                         fDispatcher = new ContentDispatcher();
891                         restoreScannerState(SCANNER_STATE_ROOT_ELEMENT);
892                         return true;
893                     }
894                     restoreScannerState(SCANNER_STATE_PROLOG);
895                 } else if (fEntityReader.lookingAtSpace(true)) {
896                     fEntityReader.skipPastSpaces();
897                 } else if (!fEntityReader.lookingAtValidChar(false)) {
898                     int invChar = fEntityReader.scanInvalidChar();
899                     if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
900                         if (invChar >= 0) {
901                             String arg = Integer.toHexString(invChar);
902                             reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_PROLOG,
903                                                 XMLMessages.P22_INVALID_CHARACTER,
904                                                 arg);
905                         }
906                     }
907                 } else {
908                     reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
909                                         XMLMessages.P22_NOT_RECOGNIZED);
910                     fEntityReader.lookingAtValidChar(true);
911                 }
912             } while (fScannerState != SCANNER_STATE_END_OF_INPUT && keepgoing);
913             return true;
914         }
915         public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
916             switch (fScannerState) {
917             case SCANNER_STATE_PROLOG:
918             case SCANNER_STATE_START_OF_MARKUP:
919             case SCANNER_STATE_DOCTYPE:
920                 break;
921             case SCANNER_STATE_COMMENT:
922                 if (!moreToFollow) {
923                     reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
924                                         XMLMessages.P15_UNTERMINATED);
925                 } else {
926                     reportFatalXMLError(XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
927                                         XMLMessages.P78_NOT_WELLFORMED);
928                 }
929                 break;
930             case SCANNER_STATE_PI:
931                 if (!moreToFollow) {
932                     reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
933                                         XMLMessages.P16_UNTERMINATED);
934                 } else {
935                     reportFatalXMLError(XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
936                                         XMLMessages.P78_NOT_WELLFORMED);
937                 }
938                 break;
939             default:
940                 throw new RuntimeException("FWK001 2] ScannerState="+fScannerState+"\n" + "2\t"+fScannerState);
941             }
942             if (!moreToFollow) {
943                 reportFatalXMLError(XMLMessages.MSG_ROOT_ELEMENT_REQUIRED,
944                                     XMLMessages.P1_ELEMENT_REQUIRED);
945                 fDispatcher = new EndOfInputDispatcher();
946                 setScannerState(SCANNER_STATE_END_OF_INPUT);
947             }
948         }
949     }
950     int fCurrentElementType = -1;
951     public int getCurrentElementType() {
952         return fCurrentElementType;
953     }
954     final class ContentDispatcher implements ScannerDispatcher {
955         private int fContentReader = -1;
956         private int fElementDepth = 0;
957         private int[] fElementTypeStack = new int[8];
958 
959         void popElementType() {
960             if (fElementDepth-- == 0) {
961                 throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
962             }
963             if (fElementDepth == 0) {
964                 fCurrentElementType = - 1;
965             } else {
966                 fCurrentElementType = fElementTypeStack[fElementDepth - 1];
967             }
968         }
969 
970         public boolean dispatch(boolean keepgoing) throws Exception {
971             do {
972                 switch (fScannerState) {
973                 case SCANNER_STATE_ROOT_ELEMENT:
974                 {
975                     scanElementType(fEntityReader, '>', fElementQName);
976                     if (fElementQName.rawname != -1) {
977                         //
978                         // root element
979                         //
980                         fContentReader = fReaderId;
981                         fSeenRootElement = true;
982                         //
983                         // scan element
984                         //
985                         if (fEntityReader.lookingAtChar('>', true)) {
986                             //
987                             // we have more content
988                             //
989                             fEventHandler.callStartElement(fElementQName);
990                             fScannerMarkupDepth--;
991                             if (fElementDepth == fElementTypeStack.length) {
992                                 int[] newStack = new int[fElementDepth * 2];
993                                 System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
994                                 fElementTypeStack = newStack;
995                             }
996                             fCurrentElementType = fElementQName.rawname;
997                             fElementTypeStack[fElementDepth] = fElementQName.rawname;
998                             fElementDepth++;
999                             restoreScannerState(SCANNER_STATE_CONTENT);
1000                        } else if (scanElement(fElementQName)) {
1001                            //
1002                            // we have more content
1003                            //
1004                            if (fElementDepth == fElementTypeStack.length) {
1005                                int[] newStack = new int[fElementDepth * 2];
1006                                System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1007                                fElementTypeStack = newStack;
1008                            }
1009                            fCurrentElementType = fElementQName.rawname;
1010                            fElementTypeStack[fElementDepth] = fElementQName.rawname;
1011                            fElementDepth++;
1012                            restoreScannerState(SCANNER_STATE_CONTENT);
1013                        } else {
1014                            fDispatcher = new TrailingMiscDispatcher();
1015                            restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1016                            return true;
1017                        }
1018                    } else {
1019                        reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
1020                                            XMLMessages.P22_NOT_RECOGNIZED);
1021                        fDispatcher = new PrologDispatcher();
1022                        restoreScannerState(SCANNER_STATE_PROLOG);
1023                        return true;
1024                    }
1025                    break;
1026                }
1027                case SCANNER_STATE_START_OF_MARKUP:
1028                    if (fEntityReader.lookingAtChar('?', true)) {
1029                        int piTarget = fEntityReader.scanName(' ');
1030                        if (piTarget == -1) {
1031                            abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
1032                                        XMLMessages.P16_PITARGET_REQUIRED);
1033                        } else if ("xml".equals(fStringPool.toString(piTarget))) {
1034                            if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1035                                if (fParseTextDecl) {
1036                                    scanXMLDeclOrTextDecl(true);
1037                                    fParseTextDecl = false;
1038                                } else {
1039                                    abortMarkup(XMLMessages.MSG_TEXTDECL_MUST_BE_FIRST,
1040                                                XMLMessages.P30_TEXTDECL_MUST_BE_FIRST);
1041                                }
1042                            } else { // a PI target matching 'xml'
1043                                abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1044                                            XMLMessages.P17_RESERVED_PITARGET);
1045                            }
1046                        } else { // PI
1047                            scanPI(piTarget);
1048                        }
1049                        restoreScannerState(SCANNER_STATE_CONTENT);
1050                    } else if (fEntityReader.lookingAtChar('!', true)) {
1051                        if (fEntityReader.lookingAtChar('-', true)) { // comment ?
1052                            if (fEntityReader.lookingAtChar('-', true)) {
1053                                scanComment(); // scan through the closing '-->'
1054                            } else {
1055                                abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1056                                            XMLMessages.P43_NOT_RECOGNIZED);
1057                            }
1058                        } else {
1059                            if (fEntityReader.skippedString(cdata_string)) {
1060                                fEntityReader.setInCDSect(true);
1061                                fEventHandler.callStartCDATA();
1062                            } else {
1063                                abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1064                                            XMLMessages.P43_NOT_RECOGNIZED);
1065                            }
1066                        }
1067                    } else {
1068                        if (fEntityReader.lookingAtChar('/', true)) {
1069                            //
1070                            // [42] ETag ::= '</' Name S? '>'
1071                            //
1072                            if (!scanExpectedElementType(fEntityReader, '>', fCurrentElementType)) {
1073                                abortMarkup(XMLMessages.MSG_ETAG_REQUIRED,
1074                                            XMLMessages.P39_UNTERMINATED,
1075                                            fCurrentElementType);
1076                            } else {
1077                                if (!fEntityReader.lookingAtChar('>', true)) {
1078                                    fEntityReader.skipPastSpaces();
1079                                    if (!fEntityReader.lookingAtChar('>', true)) {
1080                                        reportFatalXMLError(XMLMessages.MSG_ETAG_UNTERMINATED,
1081                                                            XMLMessages.P42_UNTERMINATED,
1082                                                            fCurrentElementType);
1083                                    }
1084                                }
1085                                fScannerMarkupDepth--;
1086                                fEventHandler.callEndElement(fReaderId);
1087                                if (fElementDepth-- == 0) {
1088                                    throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
1089                                }
1090                                if (fElementDepth == 0) {
1091                                    fCurrentElementType = - 1;
1092                                    fDispatcher = new TrailingMiscDispatcher();
1093                                    restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1094                                    return true;
1095                                } else {
1096                                    fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1097                                }
1098                            }
1099                        } else {
1100                            scanElementType(fEntityReader, '>', fElementQName);
1101                            if (fElementQName.rawname != -1) {
1102                                //
1103                                // element
1104                                //
1105                                if (fEntityReader.lookingAtChar('>', true)) {
1106                                    fEventHandler.callStartElement(fElementQName);
1107                                    fScannerMarkupDepth--;
1108                                    if (fElementDepth == fElementTypeStack.length) {
1109                                        int[] newStack = new int[fElementDepth * 2];
1110                                        System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1111                                        fElementTypeStack = newStack;
1112                                    }
1113                                    fCurrentElementType = fElementQName.rawname;
1114                                    fElementTypeStack[fElementDepth] = fElementQName.rawname;
1115                                    fElementDepth++;
1116                                } else {
1117                                    if (scanElement(fElementQName)) {
1118                                        if (fElementDepth == fElementTypeStack.length) {
1119                                            int[] newStack = new int[fElementDepth * 2];
1120                                            System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1121                                            fElementTypeStack = newStack;
1122                                        }
1123                                        fCurrentElementType = fElementQName.rawname;
1124                                        fElementTypeStack[fElementDepth] = fElementQName.rawname;
1125                                        fElementDepth++;
1126                                    }
1127                                }
1128                            } else {
1129                                abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1130                                            XMLMessages.P43_NOT_RECOGNIZED);
1131                            }
1132                        }
1133                    }
1134                    restoreScannerState(SCANNER_STATE_CONTENT);
1135                    break;
1136                case SCANNER_STATE_CONTENT:
1137                    if (fParseTextDecl && fEntityReader.lookingAtChar('<', true)) {
1138                        fScannerMarkupDepth++;
1139                        setScannerState(SCANNER_STATE_START_OF_MARKUP);
1140                        continue;
1141                    }
1142                    // REVISIT: Is this the right thing to do? Do we need to
1143                    //          save more information on the stack?
1144                    fCurrentElementQName.setValues(-1, -1, fCurrentElementType);
1145                    switch (fEntityReader.scanContent(fCurrentElementQName)) {
1146                    case XMLEntityHandler.CONTENT_RESULT_START_OF_PI:
1147                        fScannerMarkupDepth++;
1148                        int piTarget = fEntityReader.scanName(' ');
1149                        if (piTarget == -1) {
1150                            abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
1151                                        XMLMessages.P16_PITARGET_REQUIRED);
1152                        } else if ("xml".equals(fStringPool.toString(piTarget))) {
1153                            if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1154                                if (fReaderId == fContentReader) {
1155                                    abortMarkup(XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
1156                                                XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
1157                                } else {
1158                                    abortMarkup(XMLMessages.MSG_TEXTDECL_MUST_BE_FIRST,
1159                                                XMLMessages.P30_TEXTDECL_MUST_BE_FIRST);
1160                                }
1161                            } else { // a PI target matching 'xml'
1162                                abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1163                                            XMLMessages.P17_RESERVED_PITARGET);
1164                            }
1165                        } else { // PI
1166                            scanPI(piTarget);
1167                        }
1168                        break;
1169                    case XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT:
1170                        fScannerMarkupDepth++;
1171                        fParseTextDecl = false;
1172                        scanComment(); // scan through the closing '-->'
1173                        break;
1174                    case XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT:
1175                        fScannerMarkupDepth++;
1176                        fParseTextDecl = false;
1177                        fEntityReader.setInCDSect(true);
1178                        fEventHandler.callStartCDATA();
1179                        break;
1180                    case XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG:
1181                        fScannerMarkupDepth++;
1182                        fParseTextDecl = false;
1183                        //
1184                        // [42] ETag ::= '</' Name S? '>'
1185                        //
1186                        if (!scanExpectedElementType(fEntityReader, '>', fCurrentElementType)) {
1187                            abortMarkup(XMLMessages.MSG_ETAG_REQUIRED,
1188                                        XMLMessages.P39_UNTERMINATED,
1189                                        fCurrentElementType);
1190                        } else {
1191                            if (!fEntityReader.lookingAtChar('>', true)) {
1192                                fEntityReader.skipPastSpaces();
1193                                if (!fEntityReader.lookingAtChar('>', true)) {
1194                                    reportFatalXMLError(XMLMessages.MSG_ETAG_UNTERMINATED,
1195                                                        XMLMessages.P42_UNTERMINATED,
1196                                                        fCurrentElementType);
1197                                }
1198                            }
1199                            fScannerMarkupDepth--;
1200                            fEventHandler.callEndElement(fReaderId);
1201                            if (fElementDepth-- == 0) {
1202                                throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
1203                            }
1204                            if (fElementDepth == 0) {
1205                                fCurrentElementType = - 1;
1206                                fDispatcher = new TrailingMiscDispatcher();
1207                                restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1208                                return true;
1209                            } else {
1210                                fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1211                            }
1212                        }
1213                        restoreScannerState(SCANNER_STATE_CONTENT);
1214                        break;
1215                    case XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT:
1216                    {
1217                        fScannerMarkupDepth++;
1218                        fParseTextDecl = false;
1219                        scanElementType(fEntityReader, '>', fElementQName);
1220                        if (fElementQName.rawname != -1) {
1221                            if (fEntityReader.lookingAtChar('>', true)) {
1222                                fEventHandler.callStartElement(fElementQName);
1223                                fScannerMarkupDepth--;
1224                                if (fElementDepth == fElementTypeStack.length) {
1225                                    int[] newStack = new int[fElementDepth * 2];
1226                                    System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1227                                    fElementTypeStack = newStack;
1228                                }
1229                                fCurrentElementType = fElementQName.rawname;
1230                                fElementTypeStack[fElementDepth] = fElementQName.rawname;
1231                                fElementDepth++;
1232                            } else {
1233                                if (scanElement(fElementQName)) {
1234                                    if (fElementDepth == fElementTypeStack.length) {
1235                                        int[] newStack = new int[fElementDepth * 2];
1236                                        System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1237                                        fElementTypeStack = newStack;
1238                                    }
1239                                    fCurrentElementType = fElementQName.rawname;
1240                                    fElementTypeStack[fElementDepth] = fElementQName.rawname;
1241                                    fElementDepth++;
1242                                }
1243                            }
1244                        } else {
1245                            abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1246                                        XMLMessages.P43_NOT_RECOGNIZED);
1247                        }
1248                        if (fScannerState != SCANNER_STATE_END_OF_INPUT)
1249                            fScannerState = SCANNER_STATE_CONTENT;
1250                        break;
1251                    }
1252                    case XMLEntityHandler.CONTENT_RESULT_MATCHING_ETAG:
1253                    {
1254                        fParseTextDecl = false;
1255                        fEventHandler.callEndElement(fReaderId);
1256                        if (fElementDepth-- == 0) {
1257                            throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
1258                        }
1259                        if (fElementDepth == 0) {
1260                            fCurrentElementType = - 1;
1261                            if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1262                                fDispatcher = new TrailingMiscDispatcher();
1263                                fScannerState = SCANNER_STATE_TRAILING_MISC;
1264                            }
1265                            return true;
1266                        } else {
1267                            fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1268                        }
1269                        if (fScannerState != SCANNER_STATE_END_OF_INPUT)
1270                            fScannerState = SCANNER_STATE_CONTENT;
1271                        break;
1272                    }
1273                    case XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF:
1274                        fParseTextDecl = false;
1275                        //
1276                        // [67] Reference ::= EntityRef | CharRef
1277                        // [68] EntityRef ::= '&' Name ';'
1278                        // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1279                        //
1280                        setScannerState(SCANNER_STATE_REFERENCE);
1281                        int num = scanCharRef();
1282                        // if (num == -1) num = 0xfffd; // REVISIT - alternative is to use Unicode replacement char
1283                        if (num != -1)
1284                            fEventHandler.callCharacters(num);
1285                        restoreScannerState(SCANNER_STATE_CONTENT);
1286                        break;
1287                    case XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT:
1288                        // REVISIT - This should hopefully get us the "reference not
1289                        //   contained in one entity" error when endOfInput is called.
1290                        //   Test that this is so...
1291                        //
1292                        // fall through...
1293                        //
1294                    case XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF:
1295                        fParseTextDecl = false;
1296                        //
1297                        // [68] EntityRef ::= '&' Name ';'
1298                        //
1299                        setScannerState(SCANNER_STATE_REFERENCE);
1300                        int nameOffset = fEntityReader.currentOffset();
1301                        fEntityReader.skipPastName(';');
1302                        int nameLength = fEntityReader.currentOffset() - nameOffset;
1303                        if (nameLength == 0) {
1304                            reportFatalXMLError(XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE,
1305                                                XMLMessages.P68_NAME_REQUIRED);
1306                            restoreScannerState(SCANNER_STATE_CONTENT);
1307                        } else if (!fEntityReader.lookingAtChar(';', true)) {
1308                            reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE,
1309                                                XMLMessages.P68_SEMICOLON_REQUIRED,
1310                                                fEntityReader.addString(nameOffset, nameLength));
1311                            restoreScannerState(SCANNER_STATE_CONTENT);
1312                        } else {
1313                            restoreScannerState(SCANNER_STATE_CONTENT);
1314                            int entityName = fEntityReader.addSymbol(nameOffset, nameLength);
1315                            fParseTextDecl = fEntityHandler.startReadingFromEntity(entityName, fElementDepth, XMLEntityHandler.ENTITYREF_IN_CONTENT);
1316                        }
1317                        break;
1318                    case XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT:
1319                        fParseTextDecl = false;
1320                        //
1321                        // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1322                        // [21] CDEnd ::= ']]>'
1323                        //
1324                        if (fEntityReader.getInCDSect()) {
1325                            fEntityReader.setInCDSect(false);
1326                            fEventHandler.callEndCDATA();
1327                            fScannerMarkupDepth--;
1328                        } else {
1329                            reportFatalXMLError(XMLMessages.MSG_CDEND_IN_CONTENT,
1330                                                XMLMessages.P14_INVALID);
1331                        }
1332                        restoreScannerState(SCANNER_STATE_CONTENT);
1333                        break;
1334                    case XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR:
1335                        fParseTextDecl = false;
1336                        //
1337                        // The reader will also use this state if it
1338                        // encounters the end of input while reading
1339                        // content.  We need to check for this case.
1340                        //
1341                        if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1342                            if (!fEntityReader.lookingAtValidChar(false)) {
1343                                //
1344                                //  [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF]        // any Unicode character, excluding the
1345                                //               | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
1346                                //
1347                                int invChar = fEntityReader.scanInvalidChar();
1348                                if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1349                                    if (invChar >= 0) {
1350                                        if (fEntityReader.getInCDSect()) {
1351                                            reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_CDSECT,
1352                                                                XMLMessages.P20_INVALID_CHARACTER,
1353                                                                Integer.toHexString(invChar));
1354                                        } else {
1355                                            reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_CONTENT,
1356                                                                XMLMessages.P43_INVALID_CHARACTER,
1357                                                                Integer.toHexString(invChar));
1358                                        }
1359                                    }
1360                                }
1361                            }
1362                            restoreScannerState(SCANNER_STATE_CONTENT);
1363                        }
1364                        break;
1365                    case XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED:
1366                        fParseTextDecl = false;
1367                        abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1368                                    XMLMessages.P43_NOT_RECOGNIZED);
1369                        break;
1370                    case XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT:
1371                        // REVISIT - This should hopefully get us the "markup not
1372                        //   contained in one entity" error when endOfInput is called.
1373                        //   Test that this is so...
1374                        fScannerMarkupDepth++;
1375                        fParseTextDecl = false;
1376                        fScannerState = SCANNER_STATE_START_OF_MARKUP;
1377                        break;
1378                    default:
1379                        throw new RuntimeException("FWK001 3] ScannerState="+fScannerState+"\n" + "3\t"+fScannerState); // should not happen
1380                    }
1381                    break;
1382                default:
1383                    throw new RuntimeException("FWK001 4] ScannerState="+fScannerState+"\n" + "4\t"+fScannerState);
1384                }
1385            } while (fScannerState != SCANNER_STATE_END_OF_INPUT && keepgoing);
1386            return true;
1387        }
1388        public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
1389            switch (fScannerState) {
1390            case SCANNER_STATE_ROOT_ELEMENT:
1391            case SCANNER_STATE_START_OF_MARKUP:
1392                break;
1393            case SCANNER_STATE_CONTENT:
1394                if (fEntityReader.getInCDSect()) {
1395                    reportFatalXMLError(XMLMessages.MSG_CDSECT_UNTERMINATED,
1396                                        XMLMessages.P18_UNTERMINATED);
1397                }
1398                break;
1399            case SCANNER_STATE_ATTRIBUTE_LIST:
1400                if (!moreToFollow) {
1401// REVISIT                    reportFatalXMLError(XMLMessages.MSG_TAG1);
1402                } else {
1403// REVISIT                    reportFatalXMLError(XMLMessages.MSG_TAG1);
1404                }
1405                break;
1406            case SCANNER_STATE_ATTRIBUTE_NAME:
1407                if (!moreToFollow) {
1408// REVISIT                    reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1409                } else {
1410// REVISIT                    reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1411                }
1412                break;
1413            case SCANNER_STATE_ATTRIBUTE_VALUE:
1414                if (!moreToFollow) {
1415                    reportFatalXMLError(XMLMessages.MSG_ATTRIBUTE_VALUE_UNTERMINATED,
1416                                        XMLMessages.P10_UNTERMINATED,
1417                                        fAttValueElementType,
1418                                        fAttValueAttrName);
1419                } else if (fReaderId == fAttValueReader) {
1420// REVISIT                        reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1421                } else {
1422                    fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
1423                }
1424                break;
1425            case SCANNER_STATE_COMMENT:
1426                if (!moreToFollow) {
1427                    reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
1428                                        XMLMessages.P15_UNTERMINATED);
1429                } else {
1430                    reportFatalXMLError(XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
1431                                        XMLMessages.P78_NOT_WELLFORMED);
1432                }
1433                break;
1434            case SCANNER_STATE_PI:
1435                if (!moreToFollow) {
1436                    reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
1437                                        XMLMessages.P16_UNTERMINATED);
1438                } else {
1439                    reportFatalXMLError(XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
1440                                        XMLMessages.P78_NOT_WELLFORMED);
1441                }
1442                break;
1443            case SCANNER_STATE_REFERENCE:
1444                if (!moreToFollow) {
1445                    reportFatalXMLError(XMLMessages.MSG_REFERENCE_UNTERMINATED,
1446                                        XMLMessages.P67_UNTERMINATED);
1447                } else {
1448                    reportFatalXMLError(XMLMessages.MSG_REFERENCE_NOT_IN_ONE_ENTITY,
1449                                        XMLMessages.P78_NOT_WELLFORMED);
1450                }
1451                break;
1452            default:
1453                throw new RuntimeException("FWK001 5] ScannerState="+fScannerState+"\n" + "5\t"+fScannerState);
1454            }
1455            if (!moreToFollow) {
1456                if (fElementDepth > 0)
1457                    reportFatalXMLError(XMLMessages.MSG_ETAG_REQUIRED,
1458                                        XMLMessages.P39_UNTERMINATED,
1459                                        fCurrentElementType);
1460                fDispatcher = new EndOfInputDispatcher();
1461                setScannerState(SCANNER_STATE_END_OF_INPUT);
1462            }
1463        }
1464    }
1465    final class TrailingMiscDispatcher implements ScannerDispatcher {
1466        public boolean dispatch(boolean keepgoing) throws Exception {
1467            do {
1468                if (fEntityReader.lookingAtChar('<', true)) {
1469                    fScannerMarkupDepth++;
1470                    setScannerState(SCANNER_STATE_START_OF_MARKUP);
1471                    if (fEntityReader.lookingAtChar('?', true)) {
1472                        int piTarget = fEntityReader.scanName(' ');
1473                        if (piTarget == -1) {
1474                            abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
1475                                        XMLMessages.P16_PITARGET_REQUIRED);
1476                        } else if ("xml".equals(fStringPool.toString(piTarget))) {
1477                            if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1478                                abortMarkup(XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
1479                                            XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
1480                            } else { // a PI target matching 'xml'
1481                                abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1482                                            XMLMessages.P17_RESERVED_PITARGET);
1483                            }
1484                        } else { // PI
1485                            scanPI(piTarget);
1486                        }
1487                    } else if (fEntityReader.lookingAtChar('!', true)) {
1488                        if (fEntityReader.lookingAtChar('-', true) &&
1489                            fEntityReader.lookingAtChar('-', true)) { // comment ?
1490                            scanComment(); // scan through the closing '-->'
1491                        } else {
1492                            abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1493                                        XMLMessages.P27_NOT_RECOGNIZED);
1494                        }
1495                    } else {
1496                        abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1497                                    XMLMessages.P27_NOT_RECOGNIZED);
1498                    }
1499                    restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1500                } else if (fEntityReader.lookingAtSpace(true)) {
1501                    fEntityReader.skipPastSpaces();
1502                } else if (!fEntityReader.lookingAtValidChar(false)) {
1503                    int invChar = fEntityReader.scanInvalidChar();
1504                    if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1505                        if (invChar >= 0) {
1506                            String arg = Integer.toHexString(invChar);
1507                            reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_MISC,
1508                                                XMLMessages.P27_INVALID_CHARACTER,
1509                                                arg);
1510                        }
1511                    }
1512                } else {
1513                    reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1514                                        XMLMessages.P27_NOT_RECOGNIZED);
1515                    fEntityReader.lookingAtValidChar(true);
1516                }
1517            } while (fScannerState != SCANNER_STATE_END_OF_INPUT && keepgoing);
1518            return true;
1519        }
1520        public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
1521            if (moreToFollow)
1522                throw new RuntimeException("FWK003 TrailingMiscDispatcher.endOfInput moreToFollow");
1523            switch (fScannerState) {
1524            case SCANNER_STATE_TRAILING_MISC:
1525            case SCANNER_STATE_START_OF_MARKUP:
1526                break;
1527            case SCANNER_STATE_COMMENT:
1528                reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
1529                                    XMLMessages.P15_UNTERMINATED);
1530                break;
1531            case SCANNER_STATE_PI:
1532                reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
1533                                    XMLMessages.P16_UNTERMINATED);
1534                break;
1535            default:
1536                throw new RuntimeException("FWK001 6] ScannerState="+fScannerState+"\n" + "6\t"+fScannerState);
1537            }
1538            fDispatcher = new EndOfInputDispatcher();
1539            setScannerState(SCANNER_STATE_END_OF_INPUT);
1540        }
1541    }
1542    final class EndOfInputDispatcher implements ScannerDispatcher {
1543        public boolean dispatch(boolean keepgoing) throws Exception {
1544            if (fScannerState != SCANNER_STATE_TERMINATED)
1545                fEventHandler.callEndDocument();
1546            setScannerState(SCANNER_STATE_TERMINATED);
1547            return false;
1548        }
1549        public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
1550            throw new RuntimeException("FWK001 7] ScannerState="+fScannerState+"\n" + "7\t"+fScannerState);
1551        }
1552    }
1553    //
1554    // From the standard:
1555    //
1556    // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1557    // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1558    // [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1559    // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1560    // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1561    //                 | ('"' ('yes' | 'no') '"'))
1562    //
1563    // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1564    //
1565    void scanXMLDeclOrTextDecl(boolean scanningTextDecl) throws Exception
1566    {
1567        int version = -1;
1568        int encoding = -1;
1569        int standalone = -1;
1570        final int XMLDECL_START = 0;
1571        final int XMLDECL_VERSION = 1;
1572        final int XMLDECL_ENCODING = 2;
1573        final int XMLDECL_STANDALONE = 3;
1574        final int XMLDECL_FINISHED = 4;
1575        int state = XMLDECL_START;
1576        do {
1577            fEntityReader.skipPastSpaces();
1578            int offset = fEntityReader.currentOffset();
1579            if (scanningTextDecl) {
1580                if (state == XMLDECL_START && fEntityReader.skippedString(version_string)) {
1581                    state = XMLDECL_VERSION;
1582                } else if (fEntityReader.skippedString(encoding_string)) {
1583                    state = XMLDECL_ENCODING;
1584                } else {
1585                    abortMarkup(XMLMessages.MSG_ENCODINGDECL_REQUIRED,
1586                                XMLMessages.P77_ENCODINGDECL_REQUIRED);
1587                    return;
1588                }
1589            } else {
1590                if (state == XMLDECL_START) {
1591                    if (!fEntityReader.skippedString(version_string)) {
1592                        abortMarkup(XMLMessages.MSG_VERSIONINFO_REQUIRED,
1593                                    XMLMessages.P23_VERSIONINFO_REQUIRED);
1594                        return;
1595                    }
1596                    state = XMLDECL_VERSION;
1597                } else {
1598                    if (state == XMLDECL_VERSION) {
1599                        if (fEntityReader.skippedString(encoding_string))
1600                            state = XMLDECL_ENCODING;
1601                        else
1602                            state = XMLDECL_STANDALONE;
1603                    } else
1604                        state = XMLDECL_STANDALONE;
1605                    if (state == XMLDECL_STANDALONE && !fEntityReader.skippedString(standalone_string))
1606                        break;
1607                }
1608            }
1609            int length = fEntityReader.currentOffset() - offset;
1610            fEntityReader.skipPastSpaces();
1611            if (!fEntityReader.lookingAtChar('=', true)) {
1612                int majorCode = scanningTextDecl ?
1613                                XMLMessages.MSG_EQ_REQUIRED_IN_TEXTDECL :
1614                                XMLMessages.MSG_EQ_REQUIRED_IN_XMLDECL;
1615                int minorCode = state == XMLDECL_VERSION ?
1616                                XMLMessages.P24_EQ_REQUIRED :
1617                                (state == XMLDECL_ENCODING ?
1618                                 XMLMessages.P80_EQ_REQUIRED :
1619                                 XMLMessages.P32_EQ_REQUIRED);
1620                abortMarkup(majorCode, minorCode, fEntityReader.addString(offset, length));
1621                return;
1622            }
1623            fEntityReader.skipPastSpaces();
1624            int result = fEntityReader.scanStringLiteral();
1625            switch (result) {
1626            case XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED:
1627            {
1628                int majorCode = scanningTextDecl ?
1629                                XMLMessages.MSG_QUOTE_REQUIRED_IN_TEXTDECL :
1630                                XMLMessages.MSG_QUOTE_REQUIRED_IN_XMLDECL;
1631                int minorCode = state == XMLDECL_VERSION ?
1632                                XMLMessages.P24_QUOTE_REQUIRED :
1633                                (state == XMLDECL_ENCODING ?
1634                                 XMLMessages.P80_QUOTE_REQUIRED :
1635                                 XMLMessages.P32_QUOTE_REQUIRED);
1636                abortMarkup(majorCode, minorCode, fEntityReader.addString(offset, length));
1637                return;
1638            }
1639            case XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR:
1640                int invChar = fEntityReader.scanInvalidChar();
1641                if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1642                    if (invChar >= 0) {
1643                        int majorCode = scanningTextDecl ?
1644                                        XMLMessages.MSG_INVALID_CHAR_IN_TEXTDECL :
1645                                        XMLMessages.MSG_INVALID_CHAR_IN_XMLDECL;
1646                        int minorCode = state == XMLDECL_VERSION ?
1647                                        XMLMessages.P26_INVALID_CHARACTER :
1648                                        (state == XMLDECL_ENCODING ?
1649                                         XMLMessages.P81_INVALID_CHARACTER :
1650                                         XMLMessages.P32_INVALID_CHARACTER);
1651                        reportFatalXMLError(majorCode, minorCode, Integer.toHexString(invChar));
1652                    }
1653                    skipPastEndOfCurrentMarkup();
1654                }
1655                return;
1656            default:
1657                break;
1658            }
1659            switch (state) {
1660            case XMLDECL_VERSION:
1661                //
1662                // version="..."
1663                //
1664                version = result;
1665                String versionString = fStringPool.toString(version);
1666                if (!"1.0".equals(versionString)) {
1667                    if (!validVersionNum(versionString)) {
1668                        abortMarkup(XMLMessages.MSG_VERSIONINFO_INVALID,
1669                                            XMLMessages.P26_INVALID_VALUE,
1670                                            versionString);
1671                        return;
1672                    }
1673                    // NOTE: RECOVERABLE ERROR
1674                    Object[] args = { versionString };
1675                    fErrorReporter.reportError(fErrorReporter.getLocator(),
1676                                               XMLMessages.XML_DOMAIN,
1677                                               XMLMessages.MSG_VERSION_NOT_SUPPORTED,
1678                                               XMLMessages.P26_NOT_SUPPORTED,
1679                                               args,
1680                                               XMLErrorReporter.ERRORTYPE_RECOVERABLE_ERROR);
1681                    // REVISIT - hope it is compatible...
1682                    // skipPastEndOfCurrentMarkup();
1683                    // return;
1684                }
1685                if (!fEntityReader.lookingAtSpace(true)) {
1686                    if (scanningTextDecl) {
1687                        abortMarkup(XMLMessages.MSG_SPACE_REQUIRED_IN_TEXTDECL,
1688                                    XMLMessages.P80_WHITESPACE_REQUIRED);
1689                        return;
1690                    }
1691                    state = XMLDECL_FINISHED;
1692                }
1693                break;
1694            case XMLDECL_ENCODING:
1695                //
1696                // encoding = "..."
1697                //
1698                encoding = result;
1699                String encodingString = fStringPool.toString(encoding);
1700                if (!validEncName(encodingString)) {
1701                    abortMarkup(XMLMessages.MSG_ENCODINGDECL_INVALID,
1702                                XMLMessages.P81_INVALID_VALUE,
1703                                encodingString);
1704                    return;
1705                }
1706                if (!fEntityReader.lookingAtSpace(true)) {
1707                    state = XMLDECL_FINISHED;
1708                } else if (scanningTextDecl) {
1709                    fEntityReader.skipPastSpaces();
1710                    state = XMLDECL_FINISHED;
1711                }
1712                break;
1713            case XMLDECL_STANDALONE:
1714                //
1715                // standalone="..."
1716                //
1717                standalone = result;
1718                String standaloneString = fStringPool.toString(standalone);
1719                boolean yes = "yes".equals(standaloneString);
1720                if (!yes && !"no".equals(standaloneString)) {
1721                    abortMarkup(XMLMessages.MSG_SDDECL_INVALID,
1722                                XMLMessages.P32_INVALID_VALUE,
1723                                standaloneString);
1724                    return;
1725                }
1726                fStandalone = yes;
1727                fEntityReader.skipPastSpaces();
1728                state = XMLDECL_FINISHED;
1729                break;
1730            }
1731        } while (state != XMLDECL_FINISHED);
1732        if (!fEntityReader.lookingAtChar('?', true) || !fEntityReader.lookingAtChar('>', true)) {
1733            int majorCode, minorCode;
1734            if (scanningTextDecl) {
1735                majorCode = XMLMessages.MSG_TEXTDECL_UNTERMINATED;
1736                minorCode = XMLMessages.P77_UNTERMINATED;
1737            } else {
1738                majorCode = XMLMessages.MSG_XMLDECL_UNTERMINATED;
1739                minorCode = XMLMessages.P23_UNTERMINATED;
1740            }
1741            abortMarkup(majorCode, minorCode);
1742            return;
1743        }
1744        fScannerMarkupDepth--;
1745        if (scanningTextDecl) {
1746            fEventHandler.callTextDecl(version, encoding);
1747        } else {
1748            //
1749            // Now that we have hit '?>' we are done with XML decl. Call the
1750            // handler before returning.
1751            //
1752            fEventHandler.callXMLDecl(version, encoding, standalone);
1753            // if we see standalone = 'yes', call the eventHandler - XMLValidator
1754            if (fStandalone) {
1755                fEventHandler.callStandaloneIsYes();
1756            }
1757        }
1758    }
1759    //
1760    // From the standard:
1761    //
1762    // [39] element ::= EmptyElemTag | STag content ETag
1763    // [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1764    // [40] STag ::= '<' Name (S Attribute)* S? '>'
1765    // [41] Attribute ::= Name Eq AttValue
1766    // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1767    // [67] Reference ::= EntityRef | CharRef
1768    // [68] EntityRef ::= '&' Name ';'
1769    // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1770    // [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1771    // [42] ETag ::= '</' Name S? '>'
1772    //
1773    // Note: We have already scanned Name.
1774    //
1775    boolean scanElement(QName element) throws Exception
1776    {
1777        //
1778        // Scan for attributes
1779        //
1780        boolean greater = false;
1781        boolean slash = false;
1782        if (greater = fEntityReader.lookingAtChar('>', true)) {
1783            // no attributes
1784        } else if (fEntityReader.lookingAtSpace(true)) {
1785            int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_LIST);
1786            while (true) {
1787                fEntityReader.skipPastSpaces();
1788                //
1789                // [41] Attribute ::= Name Eq AttValue
1790                //
1791                if ((greater = fEntityReader.lookingAtChar('>', true)) || (slash = fEntityReader.lookingAtChar('/', true)))
1792                    break;
1793                //
1794                // Name
1795                //
1796                setScannerState(SCANNER_STATE_ATTRIBUTE_NAME);
1797                scanAttributeName(fEntityReader, element, fAttributeQName);
1798                if (fAttributeQName.rawname == -1) {
1799                    break;
1800                }
1801                //
1802                // Eq
1803                //
1804                fEntityReader.skipPastSpaces();
1805                if (!fEntityReader.lookingAtChar('=', true)) {
1806                    if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1807                        abortMarkup(XMLMessages.MSG_EQ_REQUIRED_IN_ATTRIBUTE,
1808                                    XMLMessages.P41_EQ_REQUIRED,
1809                                    element.rawname, fAttributeQName.rawname);
1810                        restoreScannerState(previousState);
1811                    }
1812                    return false;
1813                }
1814                fEntityReader.skipPastSpaces();
1815                int result = scanAttValue(element, fAttributeQName, false);
1816                if (result == RESULT_FAILURE) {
1817                    if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1818                        skipPastEndOfCurrentMarkup();
1819                        restoreScannerState(previousState);
1820                    }
1821                    return false;
1822                } else if (result == RESULT_DUPLICATE_ATTR) {
1823                    reportFatalXMLError(XMLMessages.MSG_ATTRIBUTE_NOT_UNIQUE,
1824                                        XMLMessages.WFC_UNIQUE_ATT_SPEC,
1825                                        element.rawname, fAttributeQName.rawname);
1826                }
1827                //The validator will check whether we have a duplicate attr in the start tag.
1828                if ( fEventHandler.attribute(element, fAttributeQName, result) ) {
1829                    reportFatalXMLError(XMLMessages.MSG_ATTRIBUTE_NOT_UNIQUE,
1830                                        XMLMessages.WFC_UNIQUE_ATT_SPEC,
1831                                        element.rawname, fAttributeQName.rawname);
1832                }
1833                restoreScannerState(SCANNER_STATE_ATTRIBUTE_LIST);
1834                if (!fEntityReader.lookingAtSpace(true)) {
1835                    if (!(greater = fEntityReader.lookingAtChar('>', true)))
1836                        slash = fEntityReader.lookingAtChar('/', true);
1837                    break;
1838                }
1839            }
1840            restoreScannerState(previousState);
1841        } else {
1842            slash = fEntityReader.lookingAtChar('/', true);
1843        }
1844        if (!greater && (!slash || !fEntityReader.lookingAtChar('>', true))) { // '>' or '/>'
1845            if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1846                abortMarkup(XMLMessages.MSG_ELEMENT_UNTERMINATED,
1847                            XMLMessages.P40_UNTERMINATED,
1848                            element.rawname);
1849            }
1850            return false;
1851        }
1852        fEventHandler.callStartElement(element);
1853        fScannerMarkupDepth--;
1854        if (slash) { // '/>'
1855            fEventHandler.callEndElement(fReaderId);
1856            return false;
1857        } else {
1858            return true;
1859        }
1860    }
1861    //
1862    // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1863    //
1864    int scanCharRef() throws Exception {
1865        int valueOffset = fEntityReader.currentOffset();
1866        boolean hex = fEntityReader.lookingAtChar('x', true);
1867        int num = fEntityReader.scanCharRef(hex);
1868        if (num < 0) {
1869            switch (num) {
1870            case XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED:
1871                reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_CHARREF,
1872                                    XMLMessages.P66_SEMICOLON_REQUIRED);
1873                return -1;
1874            case XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR:
1875                int majorCode = hex ? XMLMessages.MSG_HEXDIGIT_REQUIRED_IN_CHARREF :
1876                                      XMLMessages.MSG_DIGIT_REQUIRED_IN_CHARREF;
1877                int minorCode = hex ? XMLMessages.P66_HEXDIGIT_REQUIRED :
1878                                      XMLMessages.P66_DIGIT_REQUIRED;
1879                reportFatalXMLError(majorCode, minorCode);
1880                return -1;
1881            case XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE:
1882                num = 0x110000; // this will cause the right error to be reported below...
1883                break;
1884            }
1885        }
1886        //
1887        //  [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF]        // any Unicode character, excluding the
1888        //               | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
1889        //
1890        if (num < 0x20) {
1891            if (num == 0x09 || num == 0x0A || num == 0x0D) {
1892                return num;
1893            }
1894        } else if (num <= 0xD7FF || (num >= 0xE000 && (num <= 0xFFFD || (num >= 0x10000 && num <= 0x10FFFF)))) {
1895            return num;
1896        }
1897        int valueLength = fEntityReader.currentOffset() - valueOffset;
1898        reportFatalXMLError(XMLMessages.MSG_INVALID_CHARREF,
1899                            XMLMessages.WFC_LEGAL_CHARACTER,
1900                            fEntityReader.addString(valueOffset, valueLength));
1901        return -1;
1902    }
1903    //
1904    // From the standard:
1905    //
1906    // [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1907    //
1908    // Called after scanning past '<!--'
1909    //
1910    void scanComment() throws Exception
1911    {
1912        int commentOffset = fEntityReader.currentOffset();
1913        boolean sawDashDash = false;
1914        int previousState = setScannerState(SCANNER_STATE_COMMENT);
1915        while (fScannerState == SCANNER_STATE_COMMENT) {
1916            if (fEntityReader.lookingAtChar('-', false)) {
1917                int nextEndOffset = fEntityReader.currentOffset();
1918                int endOffset = 0;
1919                fEntityReader.lookingAtChar('-', true);
1920                int offset = fEntityReader.currentOffset();
1921                int count = 1;
1922                while (fEntityReader.lookingAtChar('-', true)) {
1923                    count++;
1924                    endOffset = nextEndOffset;
1925                    nextEndOffset = offset;
1926                    offset = fEntityReader.currentOffset();
1927                }
1928                if (count > 1) {
1929                    if (fEntityReader.lookingAtChar('>', true)) {
1930                        if (!sawDashDash && count > 2) {
1931                            reportFatalXMLError(XMLMessages.MSG_DASH_DASH_IN_COMMENT,
1932                                                XMLMessages.P15_DASH_DASH);
1933                            sawDashDash = true;
1934                        }
1935                        fScannerMarkupDepth--;
1936                        fEventHandler.callComment(fEntityReader.addString(commentOffset, endOffset - commentOffset));
1937                        restoreScannerState(previousState);
1938                        return;
1939                    } else if (!sawDashDash) {
1940                        reportFatalXMLError(XMLMessages.MSG_DASH_DASH_IN_COMMENT,
1941                                            XMLMessages.P15_DASH_DASH);
1942                        sawDashDash = true;
1943                    }
1944                }
1945            } else {
1946                if (!fEntityReader.lookingAtValidChar(true)) {
1947                    int invChar = fEntityReader.scanInvalidChar();
1948                    if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1949                        if (invChar >= 0) {
1950                            reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_COMMENT,
1951                                                XMLMessages.P15_INVALID_CHARACTER,
1952                                                Integer.toHexString(invChar));
1953                        }
1954                    }
1955                }
1956            }
1957        }
1958        restoreScannerState(previousState);
1959    }
1960    //
1961    // From the standard:
1962    //
1963    // [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1964    // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1965    //
1966    void scanPI(int piTarget) throws Exception
1967    {
1968        String piTargetString = fStringPool.toString(piTarget);
1969        if (piTargetString.length() == 3 &&
1970            (piTargetString.charAt(0) == 'X' || piTargetString.charAt(0) == 'x') &&
1971            (piTargetString.charAt(1) == 'M' || piTargetString.charAt(1) == 'm') &&
1972            (piTargetString.charAt(2) == 'L' || piTargetString.charAt(2) == 'l')) {
1973            abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1974                        XMLMessages.P17_RESERVED_PITARGET);
1975            return;
1976        }
1977        int prevState = setScannerState(SCANNER_STATE_PI);
1978        int piDataOffset = -1;
1979        int piDataLength = -1;
1980        if (!fEntityReader.lookingAtSpace(true)) {
1981            if (!fEntityReader.lookingAtChar('?', true) || !fEntityReader.lookingAtChar('>', true)) {
1982                if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1983                    abortMarkup(XMLMessages.MSG_SPACE_REQUIRED_IN_PI,
1984                                XMLMessages.P16_WHITESPACE_REQUIRED);
1985                    restoreScannerState(prevState);
1986                }
1987                return;
1988            }
1989            piDataLength = 0;
1990        } else {
1991            fEntityReader.skipPastSpaces();
1992            piDataOffset = fEntityReader.currentOffset();
1993            while (fScannerState == SCANNER_STATE_PI) {
1994                while (fEntityReader.lookingAtChar('?', false)) {
1995                    int offset = fEntityReader.currentOffset();
1996                    fEntityReader.lookingAtChar('?', true);
1997                    if (fEntityReader.lookingAtChar('>', true)) {
1998                        piDataLength = offset - piDataOffset;
1999                        break;
2000                    }
2001                }
2002                if (piDataLength >= 0)
2003                    break;
2004                if (!fEntityReader.lookingAtValidChar(true)) {
2005                    int invChar = fEntityReader.scanInvalidChar();
2006                    if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
2007                        if (invChar >= 0) {
2008                            reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_PI,
2009                                                XMLMessages.P16_INVALID_CHARACTER,
2010                                                Integer.toHexString(invChar));
2011                        }
2012                        skipPastEndOfCurrentMarkup();
2013                        restoreScannerState(prevState);
2014                    }
2015                    return;
2016                }
2017            }
2018        }
2019        fScannerMarkupDepth--;
2020        restoreScannerState(prevState);
2021        int piData = piDataLength == 0 ?
2022                     StringPool.EMPTY_STRING : fEntityReader.addString(piDataOffset, piDataLength);
2023        fEventHandler.callProcessingInstruction(piTarget, piData);
2024    }
2025
2026    /** Sets whether the parser preprocesses namespaces. */
2027    public void setNamespacesEnabled(boolean enabled) {
2028        fNamespacesEnabled = enabled;
2029    }
2030
2031    /** Returns whether the parser processes namespaces. */
2032    public boolean getNamespacesEnabled() {
2033        return fNamespacesEnabled;
2034    }
2035
2036    /** Sets whether the parser validates. */
2037    public void setValidationEnabled(boolean enabled) {
2038        fValidationEnabled = enabled;
2039        if (fDTDScanner != null) {
2040            fDTDScanner.setValidationEnabled(enabled);
2041        }
2042    }
2043
2044    /** Returns true if validation is turned on. */
2045    public boolean getValidationEnabled() {
2046        return fValidationEnabled;
2047    }
2048
2049    // old EventHandler methods pushed back into scanner
2050
2051    /** Scans element type. */
2052    private void scanElementType(XMLEntityHandler.EntityReader entityReader, 
2053                                char fastchar, QName element) throws Exception {
2054
2055        if (!fNamespacesEnabled) {
2056            element.clear();
2057            element.localpart = entityReader.scanName(fastchar);
2058            element.rawname = element.localpart;
2059        } 
2060        else {
2061            entityReader.scanQName(fastchar, element);
2062            if (entityReader.lookingAtChar(':', false)) {
2063                fErrorReporter.reportError(fErrorReporter.getLocator(),
2064                                           XMLMessages.XML_DOMAIN,
2065                                           XMLMessages.MSG_TWO_COLONS_IN_QNAME,
2066                                           XMLMessages.P5_INVALID_CHARACTER,
2067                                           null,
2068                                           XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
2069                 entityReader.skipPastNmtoken(' ');
2070            }
2071        }
2072
2073        fEventHandler.element(element);
2074
2075    } // scanElementType(XMLEntityHandler.EntityReader,char,QName)
2076
2077    /** Scans expected element type. */
2078    private boolean scanExpectedElementType(XMLEntityHandler.EntityReader entityReader, 
2079                                           char fastchar, int elementType) 
2080        throws Exception {
2081
2082        /***/
2083        // REVISIT: Why aren't we using the 'element' parameter? -Ac
2084        // REVISIT: I replaced the 'fCurrentElement' with 'element' parameter, still working, 
2085        //          just wondering Why are we using CharArrayRange in the first place? -ericye
2086        if (fCurrentElementCharArrayRange == null) {
2087            fCurrentElementCharArrayRange = fStringPool.createCharArrayRange();
2088        }
2089        fStringPool.getCharArrayRange(elementType, fCurrentElementCharArrayRange);
2090        return entityReader.scanExpectedName(fastchar, fCurrentElementCharArrayRange);
2091        /***
2092        entityReader.scanQName(fastchar, element);
2093        return true;
2094        /***/
2095
2096    } // scanExpectedElementType(XMLEntityHandler.EntityReader,char,QName)
2097
2098    /** Scans attribute name. */
2099    private void scanAttributeName(XMLEntityHandler.EntityReader entityReader, 
2100                                  QName element, QName attribute) 
2101        throws Exception {
2102
2103        /***
2104        // REVISIT: What's this check for?
2105        if (!fSeenRootElement) {
2106            fSeenRootElement = true;
2107            rootElementSpecified(element);
2108            fStringPool.resetShuffleCount();
2109        }
2110        /***/
2111
2112        if (!fNamespacesEnabled) {
2113            attribute.clear();
2114            attribute.localpart = entityReader.scanName('=');
2115            attribute.rawname = attribute.localpart;
2116        } 
2117        else {
2118            entityReader.scanQName('=', attribute);
2119            if (entityReader.lookingAtChar(':', false)) {
2120                fErrorReporter.reportError(fErrorReporter.getLocator(),
2121                                           XMLMessages.XML_DOMAIN,
2122                                           XMLMessages.MSG_TWO_COLONS_IN_QNAME,
2123                                           XMLMessages.P5_INVALID_CHARACTER,
2124                                           null,
2125                                           XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
2126                entityReader.skipPastNmtoken(' ');
2127            }
2128        }
2129
2130    } // scanAttributeName(XMLEntityHandler.EntityReader,QName,QName)
2131
2132    /** Scan doctype declaration. */
2133    private void scanDoctypeDecl(boolean standalone) throws Exception {
2134        
2135        fScanningDTD = true;
2136
2137        /***
2138        fScanningDTD = true;
2139        fCheckedForSchema = true;
2140        /***/
2141        fSeenDoctypeDecl = true;
2142        /***
2143        fStandaloneReader = standalone ? fEntityHandler.getReaderId() : -1;
2144        fDeclsAreExternal = false;
2145        if (fDTDImporter == null) {
2146            fDTDImporter = new DTDImporter(fStringPool, fErrorReporter, fEntityHandler, this);
2147        } 
2148        else {
2149            fDTDImporter.reset(fStringPool);
2150        }
2151        fDTDImporter.initHandlers(fDTDHandler);
2152        fDTDImporter.setValidating(fValidating);
2153        fDTDImporter.setNamespacesEnabled(fNamespacesEnabled);
2154        if (fDTDImporter.scanDoctypeDecl(standalone) && fValidating) {
2155            // check declared elements
2156            if (fWarningOnUndeclaredElements) {
2157                // REVISIT: comment out because won't compile 
2158                // checkDeclaredElements();
2159            }
2160
2161            // check required notations
2162            fEntityHandler.checkRequiredNotations();
2163        }
2164        fScanningDTD = false;
2165        /***/
2166        if (fDTDScanner == null) {
2167            fDTDScanner = new XMLDTDScanner(fStringPool, fErrorReporter, fEntityHandler, new ChunkyCharArray(fStringPool));
2168            fDTDScanner.setValidationEnabled(fValidationEnabled);
2169            fDTDScanner.setNamespacesEnabled(fNamespacesEnabled);
2170        }
2171        else {
2172            fDTDScanner.reset(fStringPool, new ChunkyCharArray(fStringPool));
2173        }
2174        fDTDScanner.setDTDHandler(fDTDHandler);
2175        fDTDScanner.setGrammarResolver(fGrammarResolver);
2176        // REVISIT: What about standalone?
2177        if (fDTDScanner.scanDoctypeDecl()) {
2178            if (fDTDScanner.getReadingExternalEntity()) {
2179                fDTDScanner.scanDecls(true);
2180            }
2181            // REVISIT: What about validation and checking stuff?
2182        }
2183        //VC_NOTATION_DECLARED
2184        if (fValidationEnabled) {
2185            ((DefaultEntityHandler)fEntityHandler).checkRequiredNotations();
2186        }
2187        /***/
2188        fScanningDTD = false;
2189
2190    } // scanDoctypeDecl(boolean)
2191
2192    /** Scan attribute value. */
2193    private int scanAttValue(QName element, QName attribute) throws Exception {
2194
2195        //fAttrNameLocator = getLocatorImpl(fAttrNameLocator);
2196        int attValue = scanAttValue(element, attribute, fValidationEnabled);
2197        if (attValue == -1) {
2198            return XMLDocumentScanner.RESULT_FAILURE;
2199        }
2200
2201
2202        /***
2203        // REVISIT: This is validation related.
2204        if (!fValidating && fAttDefCount == 0) {
2205            int attType = fCDATASymbol;
2206            if (fAttrListHandle == -1)
2207                fAttrListHandle = fAttrList.startAttrList();
2208            // REVISIT: Should this be localpart or rawname?
2209            if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2210                return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2211            }
2212            return XMLDocumentScanner.RESULT_SUCCESS;
2213        }
2214        /****/
2215
2216        /****
2217        // REVISIT: Validation. What should these be?
2218        int attDefIndex = getAttDef(element, attribute);
2219        if (attDefIndex == -1) {
2220
2221            if (fValidating) {
2222                // REVISIT - cache the elem/attr tuple so that we only give
2223                //  this error once for each unique occurrence
2224                Object[] args = { fStringPool.toString(element.rawname),
2225                                  fStringPool.toString(attribute.rawname) };
2226                fErrorReporter.reportError(fAttrNameLocator,
2227                                           XMLMessages.XML_DOMAIN,
2228                                           XMLMessages.MSG_ATTRIBUTE_NOT_DECLARED,
2229                                           XMLMessages.VC_ATTRIBUTE_VALUE_TYPE,
2230                                           args,
2231                                           XMLErrorReporter.ERRORTYPE_RECOVERABLE_ERROR);
2232            }
2233
2234            int attType = fCDATASymbol;
2235            if (fAttrListHandle == -1) {
2236                fAttrListHandle = fAttrList.startAttrList();
2237            }
2238            // REVISIT: Validation. What should the name be?
2239            if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2240                return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2241            }
2242            return XMLDocumentScanner.RESULT_SUCCESS;
2243        }
2244        /****/
2245
2246        /****
2247        int attType = getAttType(attDefIndex);
2248        if (attType != fCDATASymbol) {
2249            AttributeValidator av = getAttributeValidator(attDefIndex);
2250            int enumHandle = getEnumeration(attDefIndex);
2251            // REVISIT: Validation. What should these be?
2252            attValue = av.normalize(element, attribute, 
2253                                    attValue, attType, enumHandle);
2254        }
2255
2256        if (fAttrListHandle == -1) {
2257            fAttrListHandle = fAttrList.startAttrList();
2258        }
2259        // REVISIT: Validation. What should the name be?
2260        if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2261            return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2262        }
2263        /***/
2264
2265        return XMLDocumentScanner.RESULT_SUCCESS;
2266
2267    } // scanAttValue(QName,QName):int
2268
2269    /** Returns true if the version number is valid. */
2270    private boolean validVersionNum(String version) {
2271        return XMLCharacterProperties.validVersionNum(version);
2272    }
2273
2274    /** Returns true if the encoding name is valid. */
2275    private boolean validEncName(String encoding) {
2276        return XMLCharacterProperties.validEncName(encoding);
2277    }
2278
2279} // class XMLDocumentScanner