Source code: org/apache/xerces/framework/XMLDocumentScanner.java
1 /*
2 * The Apache Software License, Version 1.1
3 *
4 *
5 * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6 * reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Apache Software Foundation (http://www.apache.org/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Xerces" and "Apache Software Foundation" must
28 * not be used to endorse or promote products derived from this
29 * software without prior written permission. For written
30 * permission, please contact apache@apache.org.
31 *
32 * 5. Products derived from this software may not be called "Apache",
33 * nor may "Apache" appear in their name, without prior written
34 * permission of the Apache Software Foundation.
35 *
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This software consists of voluntary contributions made by many
51 * individuals on behalf of the Apache Software Foundation and was
52 * originally based on software copyright (c) 1999, International
53 * Business Machines, Inc., http://www.apache.org. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58 package org.apache.xerces.framework;
59
60 import org.apache.xerces.readers.XMLEntityHandler;
61 import org.apache.xerces.readers.DefaultEntityHandler;
62 import org.apache.xerces.utils.ChunkyCharArray;
63 import org.apache.xerces.utils.QName;
64 import org.apache.xerces.utils.StringPool;
65 import org.apache.xerces.utils.XMLCharacterProperties;
66 import org.apache.xerces.utils.XMLMessages;
67 import org.apache.xerces.validators.common.GrammarResolver;
68
69 import org.xml.sax.Locator;
70 import org.xml.sax.SAXParseException;
71
72 /**
73 * This class recognizes most of the grammer for an XML processor.
74 * Additional support is provided by the XMLEntityHandler, via the
75 * XMLEntityReader instances it creates, which are used to process
76 * simple constructs like string literals and character data between
77 * markup. The XMLDTDScanner class contains the remaining support
78 * for the grammer of DTD declarations. When a <!DOCTYPE ...> is
79 * found in the document, the scanDoctypeDecl method will then be
80 * called and the XMLDocumentScanner subclass is responsible for
81 * "connecting" that method to the corresponding method provided
82 * by the XMLDTDScanner class.
83 *
84 * @version $Id: XMLDocumentScanner.java,v 1.3 2000/10/07 18:06:53 markd Exp $
85 */
86 public final class XMLDocumentScanner {
87 //
88 // Constants
89 //
90
91 //
92 // These character arrays are used as parameters for calls to the
93 // XMLEntityHandler.EntityReader skippedString() method. Some have
94 // package access for use by the inner dispatcher classes.
95 //
96
97 //
98 // [19] CDStart ::= '<![CDATA['
99 //
100 static final char[] cdata_string = { '[','C','D','A','T','A','[' };
101 //
102 // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
103 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
104 //
105 static final char[] xml_string = { 'x','m','l' };
106 //
107 // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
108 //
109 private static final char[] version_string = { 'v','e','r','s','i','o','n' };
110 //
111 // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
112 // ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
113 //
114 static final char[] doctype_string = { 'D','O','C','T','Y','P','E' };
115 //
116 // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
117 // | ('"' ('yes' | 'no') '"'))
118 //
119 private static final char[] standalone_string = { 's','t','a','n','d','a','l','o','n','e' };
120 //
121 // [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
122 //
123 private static final char[] encoding_string = { 'e','n','c','o','d','i','n','g' };
124
125 /*
126 * Return values for the EventHandler scanAttValue method.
127 */
128 public static final int
129 RESULT_SUCCESS = 0,
130 RESULT_FAILURE = -1,
131 RESULT_DUPLICATE_ATTR = -2;
132
133 /** Scanner states */
134 static final int
135 SCANNER_STATE_XML_DECL = 0,
136 SCANNER_STATE_START_OF_MARKUP = 1,
137 SCANNER_STATE_COMMENT = 2,
138 SCANNER_STATE_PI = 3,
139 SCANNER_STATE_DOCTYPE = 4,
140 SCANNER_STATE_PROLOG = 5,
141 SCANNER_STATE_ROOT_ELEMENT = 6,
142 SCANNER_STATE_CONTENT = 7,
143 SCANNER_STATE_REFERENCE = 8,
144 SCANNER_STATE_ATTRIBUTE_LIST = 9,
145 SCANNER_STATE_ATTRIBUTE_NAME = 10,
146 SCANNER_STATE_ATTRIBUTE_VALUE = 11,
147 SCANNER_STATE_TRAILING_MISC = 12,
148 SCANNER_STATE_END_OF_INPUT = 13,
149 SCANNER_STATE_TERMINATED = 14;
150
151 //
152 // Instance Variables
153 //
154 /***/
155 // NOTE: Used by old implementation of scanElementType method. -Ac
156 private StringPool.CharArrayRange fCurrentElementCharArrayRange = null;
157 /***/
158 int fAttrListHandle = -1;
159 XMLAttrList fAttrList = null;
160 GrammarResolver fGrammarResolver = null;
161 XMLDTDScanner fDTDScanner = null;
162 boolean fNamespacesEnabled = false;
163 boolean fValidationEnabled = false;
164 QName fElementQName = new QName();
165 QName fAttributeQName = new QName();
166 QName fCurrentElementQName = new QName();
167 ScannerDispatcher fDispatcher = null;
168 EventHandler fEventHandler = null;
169 XMLDocumentHandler.DTDHandler fDTDHandler = null;
170 StringPool fStringPool = null;
171 XMLErrorReporter fErrorReporter = null;
172 XMLEntityHandler fEntityHandler = null;
173 XMLEntityHandler.EntityReader fEntityReader = null;
174 XMLEntityHandler.CharBuffer fLiteralData = null;
175 boolean fSeenRootElement = false;
176 boolean fSeenDoctypeDecl = false;
177 boolean fStandalone = false;
178 boolean fParseTextDecl = false;
179 boolean fScanningDTD = false;
180 int fScannerState = SCANNER_STATE_XML_DECL;
181 int fReaderId = -1;
182 int fAttValueReader = -1;
183 int fAttValueElementType = -1;
184 int fAttValueAttrName = -1;
185 int fAttValueOffset = -1;
186 int fAttValueMark = -1;
187 int fScannerMarkupDepth = 0;
188
189 //
190 // Interfaces
191 //
192
193 /**
194 * This interface must be implemented by the users of the XMLDocumentScanner class.
195 * These methods form the abstraction between the implementation semantics and the
196 * more generic task of scanning the XML non-DTD grammar.
197 */
198 public interface EventHandler {
199 /**
200 * Signal standalone = "yes"
201 *
202 * @exception java.lang.Exception
203 */
204 public void callStandaloneIsYes() throws Exception;
205
206 /**
207 * Signal the start of a document
208 *
209 * @exception java.lang.Exception
210 */
211 public void callStartDocument() throws Exception;
212 /**
213 * Signal the end of a document
214 *
215 * @exception java.lang.Exception
216 */
217 public void callEndDocument() throws Exception;
218 /**
219 * Signal the XML declaration of a document
220 *
221 * @param version the handle in the string pool for the version number
222 * @param encoding the handle in the string pool for the encoding
223 * @param standalong the handle in the string pool for the standalone value
224 * @exception java.lang.Exception
225 */
226 public void callXMLDecl(int version, int encoding, int standalone) throws Exception;
227 /**
228 * Signal the Text declaration of an external entity.
229 *
230 * @param version the handle in the string pool for the version number
231 * @param encoding the handle in the string pool for the encoding
232 * @exception java.lang.Exception
233 */
234 public void callTextDecl(int version, int encoding) throws Exception;
235 /**
236 * signal the scanning of a start element tag
237 *
238 * @param element Element name scanned.
239 * @exception java.lang.Exception
240 */
241 public void callStartElement(QName element) throws Exception;
242 /**
243 * Signal the scanning of an element name in a start element tag.
244 *
245 * @param element Element name scanned.
246 */
247 public void element(QName element) throws Exception;
248 /**
249 * Signal the scanning of an attribute associated to the previous
250 * start element tag.
251 *
252 * @param element Element name scanned.
253 * @param attrName Attribute name scanned.
254 * @param attrValue The string pool index of the attribute value.
255 */
256 public boolean attribute(QName element, QName attrName, int attrValue) throws Exception;
257 /**
258 * signal the scanning of an end element tag
259 *
260 * @param readerId the Id of the reader being used to scan the end tag.
261 * @exception java.lang.Exception
262 */
263 public void callEndElement(int readerId) throws Exception;
264 /**
265 * Signal the start of a CDATA section
266 * @exception java.lang.Exception
267 */
268 public void callStartCDATA() throws Exception;
269 /**
270 * Signal the end of a CDATA section
271 * @exception java.lang.Exception
272 */
273 public void callEndCDATA() throws Exception;
274 /**
275 * Report the scanning of character data
276 *
277 * @param ch the handle in the string pool of the character data that was scanned
278 * @exception java.lang.Exception
279 */
280 public void callCharacters(int ch) throws Exception;
281 /**
282 * Report the scanning of a processing instruction
283 *
284 * @param piTarget the handle in the string pool of the processing instruction targe
285 * @param piData the handle in the string pool of the processing instruction data
286 * @exception java.lang.Exception
287 */
288 public void callProcessingInstruction(int piTarget, int piData) throws Exception;
289 /**
290 * Report the scanning of a comment
291 *
292 * @param data the handle in the string pool of the comment text
293 * @exception java.lang.Exception
294 */
295 public void callComment(int data) throws Exception;
296 }
297
298 /**
299 * Constructor
300 */
301 public XMLDocumentScanner(StringPool stringPool,
302 XMLErrorReporter errorReporter,
303 XMLEntityHandler entityHandler,
304 XMLEntityHandler.CharBuffer literalData) {
305 fStringPool = stringPool;
306 fErrorReporter = errorReporter;
307 fEntityHandler = entityHandler;
308 fLiteralData = literalData;
309 fDispatcher = new XMLDeclDispatcher();
310 fAttrList = new XMLAttrList(fStringPool);
311 }
312
313 /**
314 * Set the event handler
315 *
316 * @param eventHandler The place to send our callbacks.
317 */
318 public void setEventHandler(XMLDocumentScanner.EventHandler eventHandler) {
319 fEventHandler = eventHandler;
320 }
321
322 /** Set the DTD handler. */
323 public void setDTDHandler(XMLDocumentHandler.DTDHandler dtdHandler) {
324 fDTDHandler = dtdHandler;
325 }
326
327 /** Sets the grammar resolver. */
328 public void setGrammarResolver(GrammarResolver resolver) {
329 fGrammarResolver = resolver;
330 }
331
332 /**
333 * reset the parser so that the instance can be reused
334 *
335 * @param stringPool the string pool instance to be used by the reset parser
336 */
337 public void reset(StringPool stringPool, XMLEntityHandler.CharBuffer literalData) {
338 fStringPool = stringPool;
339 fLiteralData = literalData;
340 fParseTextDecl = false;
341 fSeenRootElement = false;
342 fSeenDoctypeDecl = false;
343 fStandalone = false;
344 fScanningDTD = false;
345 fDispatcher = new XMLDeclDispatcher();
346 fScannerState = SCANNER_STATE_XML_DECL;
347 fScannerMarkupDepth = 0;
348 fAttrList = new XMLAttrList(fStringPool);
349 }
350
351 //
352 // From the standard:
353 //
354 // [1] document ::= prolog element Misc*
355 //
356 // [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
357 // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
358 // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
359 //
360 // The beginning of XMLDecl simplifies to:
361 // '<?xml' S ...
362 //
363 // [27] Misc ::= Comment | PI | S
364 // [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
365 // [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
366 // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
367 //
368 // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
369 // ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
370 //
371 /**
372 * Entry point for parsing
373 *
374 * @param doItAll if true the entire document is parsed otherwise just
375 * the next segment of the document is parsed
376 */
377 public boolean parseSome(boolean doItAll) throws Exception
378 {
379 do {
380 if (!fDispatcher.dispatch(doItAll))
381 return false;
382 } while (doItAll);
383 return true;
384 }
385
386 /**
387 * Change readers
388 *
389 * @param nextReader the new reader that the scanner will use
390 * @param nextReaderId id of the reader to change to
391 * @exception throws java.lang.Exception
392 */
393 public void readerChange(XMLEntityHandler.EntityReader nextReader, int nextReaderId) throws Exception {
394 fEntityReader = nextReader;
395 fReaderId = nextReaderId;
396 if (fScannerState == SCANNER_STATE_ATTRIBUTE_VALUE) {
397 fAttValueOffset = fEntityReader.currentOffset();
398 fAttValueMark = fAttValueOffset;
399 }
400
401 //also propagate the change to DTDScanner if there is one
402 if (fDTDScanner != null && fScanningDTD)
403 fDTDScanner.readerChange(nextReader, nextReaderId);
404 }
405
406 /**
407 * Handle the end of input
408 *
409 * @param entityName the handle in the string pool of the name of the entity which has reached end of input
410 * @param moreToFollow if true, there is still input left to process in other readers
411 * @exception java.lang.Exception
412 */
413 public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
414 if (fDTDScanner != null && fScanningDTD){
415 fDTDScanner.endOfInput(entityName, moreToFollow);
416 }
417 fDispatcher.endOfInput(entityName, moreToFollow);
418 }
419
420 /**
421 * Tell if scanner has reached end of input
422 * @return true if scanner has reached end of input.
423 */
424 public boolean atEndOfInput() {
425 return fScannerState == SCANNER_STATE_END_OF_INPUT;
426 }
427
428 //
429 // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
430 //
431 /**
432 * Scan an attribute value
433 *
434 * @param elementType handle to the element whose attribute value is being scanned
435 * @param attrName handle in the string pool of the name of attribute being scanned
436 * @param asSymbol controls whether the value is a string (duplicates allowed) or a symbol (duplicates not allowed)
437 * @return handle in the string pool of the scanned value
438 * @exception java.lang.Exception
439 */
440 public int scanAttValue(QName element, QName attribute, boolean asSymbol) throws Exception {
441 boolean single;
442 if (!(single = fEntityReader.lookingAtChar('\'', true)) && !fEntityReader.lookingAtChar('\"', true)) {
443 reportFatalXMLError(XMLMessages.MSG_QUOTE_REQUIRED_IN_ATTVALUE,
444 XMLMessages.P10_QUOTE_REQUIRED,
445 element.rawname,
446 attribute.rawname);
447 return -1;
448 }
449 char qchar = single ? '\'' : '\"';
450 fAttValueMark = fEntityReader.currentOffset();
451 int attValue = fEntityReader.scanAttValue(qchar, asSymbol);
452 if (attValue >= 0)
453 return attValue;
454 int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_VALUE);
455 fAttValueReader = fReaderId;
456 // REVISIT: What should this be?
457 fAttValueElementType = element.rawname;
458 // REVISIT: What should this be?
459 fAttValueAttrName = attribute.rawname;
460 fAttValueOffset = fEntityReader.currentOffset();
461 int dataOffset = fLiteralData.length();
462 if (fAttValueOffset - fAttValueMark > 0)
463 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
464 fAttValueMark = fAttValueOffset;
465 boolean setMark = false;
466 boolean skippedCR;
467 while (true) {
468 if (fEntityReader.lookingAtChar(qchar, true)) {
469 if (fReaderId == fAttValueReader)
470 break;
471 } else if (fEntityReader.lookingAtChar(' ', true)) {
472 //
473 // no action required
474 //
475 } else if ((skippedCR = fEntityReader.lookingAtChar((char)0x0D, true)) || fEntityReader.lookingAtSpace(true)) {
476 if (fAttValueOffset - fAttValueMark > 0)
477 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
478 setMark = true;
479 fLiteralData.append(' ');
480 if (skippedCR) {
481 //
482 // REVISIT - HACK !!! code changed to pass incorrect OASIS test 'valid-sa-110'
483 // Uncomment the next line to conform to the spec...
484 //
485 //fEntityReader.lookingAtChar((char)0x0A, true);
486 }
487 } else if (fEntityReader.lookingAtChar('&', true)) {
488 if (fAttValueOffset - fAttValueMark > 0)
489 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
490 setMark = true;
491 //
492 // Check for character reference first.
493 //
494 if (fEntityReader.lookingAtChar('#', true)) {
495 int ch = scanCharRef();
496 if (ch != -1) {
497 if (ch < 0x10000)
498 fLiteralData.append((char)ch);
499 else {
500 fLiteralData.append((char)(((ch-0x00010000)>>10)+0xd800));
501 fLiteralData.append((char)(((ch-0x00010000)&0x3ff)+0xdc00));
502 }
503 }
504 } else {
505 //
506 // Entity reference
507 //
508 int nameOffset = fEntityReader.currentOffset();
509 fEntityReader.skipPastName(';');
510 int nameLength = fEntityReader.currentOffset() - nameOffset;
511 if (nameLength == 0) {
512 reportFatalXMLError(XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE,
513 XMLMessages.P68_NAME_REQUIRED);
514 } else if (!fEntityReader.lookingAtChar(';', true)) {
515 reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE,
516 XMLMessages.P68_SEMICOLON_REQUIRED,
517 fEntityReader.addString(nameOffset, nameLength));
518 } else {
519 int entityName = fEntityReader.addSymbol(nameOffset, nameLength);
520 fEntityHandler.startReadingFromEntity(entityName, fScannerMarkupDepth, XMLEntityHandler.ENTITYREF_IN_ATTVALUE);
521 }
522 }
523 } else if (fEntityReader.lookingAtChar('<', true)) {
524 if (fAttValueOffset - fAttValueMark > 0)
525 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
526 setMark = true;
527 reportFatalXMLError(XMLMessages.MSG_LESSTHAN_IN_ATTVALUE,
528 XMLMessages.WFC_NO_LESSTHAN_IN_ATTVALUE,
529 element.rawname,
530 attribute.rawname);
531 } else if (!fEntityReader.lookingAtValidChar(true)) {
532 if (fAttValueOffset - fAttValueMark > 0)
533 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
534 setMark = true;
535 int invChar = fEntityReader.scanInvalidChar();
536 if (fScannerState == SCANNER_STATE_END_OF_INPUT)
537 return -1;
538 if (invChar >= 0) {
539 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_ATTVALUE,
540 XMLMessages.P10_INVALID_CHARACTER,
541 fStringPool.toString(element.rawname),
542 fStringPool.toString(attribute.rawname),
543 Integer.toHexString(invChar));
544 }
545 }
546 fAttValueOffset = fEntityReader.currentOffset();
547 if (setMark) {
548 fAttValueMark = fAttValueOffset;
549 setMark = false;
550 }
551 }
552 restoreScannerState(previousState);
553 int dataLength = fLiteralData.length() - dataOffset;
554 if (dataLength == 0) {
555 return fEntityReader.addString(fAttValueMark, fAttValueOffset - fAttValueMark);
556 }
557 if (fAttValueOffset - fAttValueMark > 0) {
558 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
559 dataLength = fLiteralData.length() - dataOffset;
560 }
561 int value = fLiteralData.addString(dataOffset, dataLength);
562 return value;
563 }
564
565 /**
566 * Check the value of an XML Language attribute
567 * @param langValue the handle in the string pool of the value to be checked
568 * @exception java.lang.Exception
569 */
570 public void checkXMLLangAttributeValue(int langValue) throws Exception {
571 String lang = fStringPool.toString(langValue);
572 int offset = -1;
573 if (lang.length() >= 2) {
574 char ch0 = lang.charAt(0);
575 if (lang.charAt(1) == '-') {
576 if (ch0 == 'i' || ch0 == 'I' || ch0 == 'x' || ch0 == 'X') {
577 offset = 1;
578 }
579 } else {
580 char ch1 = lang.charAt(1);
581 if (((ch0 >= 'a' && ch0 <= 'z') || (ch0 >= 'A' && ch0 <= 'Z')) &&
582 ((ch1 >= 'a' && ch1 <= 'z') || (ch1 >= 'A' && ch1 <= 'Z'))) {
583 offset = 2;
584 }
585 }
586 }
587 if (offset > 0 && lang.length() > offset) {
588 char ch = lang.charAt(offset++);
589 if (ch != '-') {
590 offset = -1;
591 } else {
592 while (true) {
593 if (ch == '-') {
594 if (lang.length() == offset) {
595 offset = -1;
596 break;
597 }
598 ch = lang.charAt(offset++);
599 if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')) {
600 offset = -1;
601 break;
602 }
603 if (lang.length() == offset)
604 break;
605 } else if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')) {
606 offset = -1;
607 break;
608 } else if (lang.length() == offset)
609 break;
610 ch = lang.charAt(offset++);
611 }
612 }
613 }
614 if (offset == -1) {
615 reportFatalXMLError(XMLMessages.MSG_XML_LANG_INVALID,
616 XMLMessages.P33_INVALID,
617 lang);
618 }
619 }
620
621 //
622 //
623 //
624 void reportFatalXMLError(int majorCode, int minorCode) throws Exception {
625 fErrorReporter.reportError(fErrorReporter.getLocator(),
626 XMLMessages.XML_DOMAIN,
627 majorCode,
628 minorCode,
629 null,
630 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
631 }
632 void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1) throws Exception {
633 Object[] args = { fStringPool.toString(stringIndex1) };
634 fErrorReporter.reportError(fErrorReporter.getLocator(),
635 XMLMessages.XML_DOMAIN,
636 majorCode,
637 minorCode,
638 args,
639 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
640 }
641 void reportFatalXMLError(int majorCode, int minorCode, String string1) throws Exception {
642 Object[] args = { string1 };
643 fErrorReporter.reportError(fErrorReporter.getLocator(),
644 XMLMessages.XML_DOMAIN,
645 majorCode,
646 minorCode,
647 args,
648 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
649 }
650 void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception {
651 Object[] args = { fStringPool.toString(stringIndex1),
652 fStringPool.toString(stringIndex2) };
653 fErrorReporter.reportError(fErrorReporter.getLocator(),
654 XMLMessages.XML_DOMAIN,
655 majorCode,
656 minorCode,
657 args,
658 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
659 }
660 void reportFatalXMLError(int majorCode, int minorCode, String string1, String string2) throws Exception {
661 Object[] args = { string1, string2 };
662 fErrorReporter.reportError(fErrorReporter.getLocator(),
663 XMLMessages.XML_DOMAIN,
664 majorCode,
665 minorCode,
666 args,
667 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
668 }
669 void reportFatalXMLError(int majorCode, int minorCode, String string1, String string2, String string3) throws Exception {
670 Object[] args = { string1, string2, string3 };
671 fErrorReporter.reportError(fErrorReporter.getLocator(),
672 XMLMessages.XML_DOMAIN,
673 majorCode,
674 minorCode,
675 args,
676 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
677 }
678 void abortMarkup(int majorCode, int minorCode) throws Exception {
679 reportFatalXMLError(majorCode, minorCode);
680 skipPastEndOfCurrentMarkup();
681 }
682 void abortMarkup(int majorCode, int minorCode, int stringIndex1) throws Exception {
683 reportFatalXMLError(majorCode, minorCode, stringIndex1);
684 skipPastEndOfCurrentMarkup();
685 }
686 void abortMarkup(int majorCode, int minorCode, String string1) throws Exception {
687 reportFatalXMLError(majorCode, minorCode, string1);
688 skipPastEndOfCurrentMarkup();
689 }
690 void abortMarkup(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception {
691 reportFatalXMLError(majorCode, minorCode, stringIndex1, stringIndex2);
692 skipPastEndOfCurrentMarkup();
693 }
694 void skipPastEndOfCurrentMarkup() throws Exception {
695 fEntityReader.skipToChar('>');
696 if (fEntityReader.lookingAtChar('>', true))
697 fScannerMarkupDepth--;
698 }
699 //
700 //
701 //
702 int setScannerState(int state) {
703 int oldState = fScannerState;
704 fScannerState = state;
705 return oldState;
706 }
707 void restoreScannerState(int state) {
708 if (fScannerState != SCANNER_STATE_END_OF_INPUT)
709 fScannerState = state;
710 }
711 //
712 //
713 //
714 /**
715 * The main loop of the scanner is implemented by calling the dispatch method
716 * of ScannerDispatcher with a flag which tells the dispatcher whether to continue
717 * or return. The scanner logic is split up into dispatchers for various syntatic
718 * components of XML. //REVISIT more rationale needed
719 */
720 interface ScannerDispatcher {
721 /**
722 * scan an XML syntactic component
723 *
724 * @param keepgoing if true continue on to the next dispatcher, otherwise return
725 * @return true if scanning was successful //REVISIT - does it ever return false or does it just throw?
726 * @exception java.lang.Exception
727 */
728 boolean dispatch(boolean keepgoing) throws Exception;
729 /**
730 * endOfInput encapsulates the end of entity handling for each dispatcher
731 *
732 * @param entityName StringPool handle of the entity that has reached the end
733 * @param moreToFollow true if there is more input to be read
734 * @exception
735 */
736 void endOfInput(int entityName, boolean moreToFollow) throws Exception;
737 }
738 final class XMLDeclDispatcher implements ScannerDispatcher {
739 public boolean dispatch(boolean keepgoing) throws Exception {
740 fEventHandler.callStartDocument();
741 if (fEntityReader.lookingAtChar('<', true)) {
742 fScannerMarkupDepth++;
743 setScannerState(SCANNER_STATE_START_OF_MARKUP);
744 if (fEntityReader.lookingAtChar('?', true)) {
745 int piTarget = fEntityReader.scanName(' ');
746 if (piTarget == -1) {
747 abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
748 XMLMessages.P16_PITARGET_REQUIRED);
749 } else if ("xml".equals(fStringPool.toString(piTarget))) {
750 if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
751 scanXMLDeclOrTextDecl(false);
752 } else { // a PI target matching 'xml'
753 abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
754 XMLMessages.P17_RESERVED_PITARGET);
755 }
756 } else { // PI
757 scanPI(piTarget);
758 }
759 fDispatcher = new PrologDispatcher();
760 restoreScannerState(SCANNER_STATE_PROLOG);
761 return true;
762 }
763 if (fEntityReader.lookingAtChar('!', true)) {
764 if (fEntityReader.lookingAtChar('-', true)) { // comment ?
765 if (fEntityReader.lookingAtChar('-', true)) {
766 scanComment(); // scan through the closing '-->'
767 } else {
768 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
769 XMLMessages.P22_NOT_RECOGNIZED);
770 }
771 } else {
772 if (fEntityReader.skippedString(doctype_string)) {
773 setScannerState(SCANNER_STATE_DOCTYPE);
774 fSeenDoctypeDecl = true;
775 scanDoctypeDecl(fStandalone); // scan through the closing '>'
776 fScannerMarkupDepth--;
777 fDispatcher = new PrologDispatcher();
778 restoreScannerState(SCANNER_STATE_PROLOG);
779 return true;
780 } else {
781 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
782 XMLMessages.P22_NOT_RECOGNIZED);
783 }
784 }
785 } else {
786 fDispatcher = new ContentDispatcher();
787 restoreScannerState(SCANNER_STATE_ROOT_ELEMENT);
788 return true;
789 }
790 } else {
791 if (fEntityReader.lookingAtSpace(true)) {
792 fEntityReader.skipPastSpaces();
793 } else if (!fEntityReader.lookingAtValidChar(false)) {
794 int invChar = fEntityReader.scanInvalidChar();
795 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
796 if (invChar >= 0) {
797 String arg = Integer.toHexString(invChar);
798 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_PROLOG,
799 XMLMessages.P22_INVALID_CHARACTER,
800 arg);
801 }
802 }
803 } else {
804 reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
805 XMLMessages.P22_NOT_RECOGNIZED);
806 fEntityReader.lookingAtValidChar(true);
807 }
808 }
809 fDispatcher = new PrologDispatcher();
810 restoreScannerState(SCANNER_STATE_PROLOG);
811 return true;
812 }
813 public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
814 switch (fScannerState) {
815 case SCANNER_STATE_XML_DECL:
816 case SCANNER_STATE_START_OF_MARKUP:
817 case SCANNER_STATE_DOCTYPE:
818 break;
819 case SCANNER_STATE_COMMENT:
820 if (!moreToFollow) {
821 reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
822 XMLMessages.P15_UNTERMINATED);
823 } else {
824 reportFatalXMLError(XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
825 XMLMessages.P78_NOT_WELLFORMED);
826 }
827 break;
828 case SCANNER_STATE_PI:
829 if (!moreToFollow) {
830 reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
831 XMLMessages.P16_UNTERMINATED);
832 } else {
833 reportFatalXMLError(XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
834 XMLMessages.P78_NOT_WELLFORMED);
835 }
836 break;
837 default:
838 throw new RuntimeException("FWK001 1] ScannerState="+fScannerState+"\n" + "1\t"+fScannerState);
839 }
840 if (!moreToFollow) {
841 reportFatalXMLError(XMLMessages.MSG_ROOT_ELEMENT_REQUIRED,
842 XMLMessages.P1_ELEMENT_REQUIRED);
843 fDispatcher = new EndOfInputDispatcher();
844 setScannerState(SCANNER_STATE_END_OF_INPUT);
845 }
846 }
847 }
848 final class PrologDispatcher implements ScannerDispatcher {
849 public boolean dispatch(boolean keepgoing) throws Exception {
850 do {
851 if (fEntityReader.lookingAtChar('<', true)) {
852 fScannerMarkupDepth++;
853 setScannerState(SCANNER_STATE_START_OF_MARKUP);
854 if (fEntityReader.lookingAtChar('?', true)) {
855 int piTarget = fEntityReader.scanName(' ');
856 if (piTarget == -1) {
857 abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
858 XMLMessages.P16_PITARGET_REQUIRED);
859 } else if ("xml".equals(fStringPool.toString(piTarget))) {
860 if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
861 abortMarkup(XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
862 XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
863 } else { // a PI target matching 'xml'
864 abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
865 XMLMessages.P17_RESERVED_PITARGET);
866 }
867 } else { // PI
868 scanPI(piTarget);
869 }
870 } else if (fEntityReader.lookingAtChar('!', true)) {
871 if (fEntityReader.lookingAtChar('-', true)) { // comment ?
872 if (fEntityReader.lookingAtChar('-', true)) {
873 scanComment(); // scan through the closing '-->'
874 } else {
875 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
876 XMLMessages.P22_NOT_RECOGNIZED);
877 }
878 } else {
879 if (!fSeenDoctypeDecl && fEntityReader.skippedString(doctype_string)) {
880 setScannerState(SCANNER_STATE_DOCTYPE);
881 fSeenDoctypeDecl = true;
882 scanDoctypeDecl(fStandalone); // scan through the closing '>'
883 fScannerMarkupDepth--;
884 } else {
885 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
886 XMLMessages.P22_NOT_RECOGNIZED);
887 }
888 }
889 } else {
890 fDispatcher = new ContentDispatcher();
891 restoreScannerState(SCANNER_STATE_ROOT_ELEMENT);
892 return true;
893 }
894 restoreScannerState(SCANNER_STATE_PROLOG);
895 } else if (fEntityReader.lookingAtSpace(true)) {
896 fEntityReader.skipPastSpaces();
897 } else if (!fEntityReader.lookingAtValidChar(false)) {
898 int invChar = fEntityReader.scanInvalidChar();
899 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
900 if (invChar >= 0) {
901 String arg = Integer.toHexString(invChar);
902 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_PROLOG,
903 XMLMessages.P22_INVALID_CHARACTER,
904 arg);
905 }
906 }
907 } else {
908 reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
909 XMLMessages.P22_NOT_RECOGNIZED);
910 fEntityReader.lookingAtValidChar(true);
911 }
912 } while (fScannerState != SCANNER_STATE_END_OF_INPUT && keepgoing);
913 return true;
914 }
915 public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
916 switch (fScannerState) {
917 case SCANNER_STATE_PROLOG:
918 case SCANNER_STATE_START_OF_MARKUP:
919 case SCANNER_STATE_DOCTYPE:
920 break;
921 case SCANNER_STATE_COMMENT:
922 if (!moreToFollow) {
923 reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
924 XMLMessages.P15_UNTERMINATED);
925 } else {
926 reportFatalXMLError(XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
927 XMLMessages.P78_NOT_WELLFORMED);
928 }
929 break;
930 case SCANNER_STATE_PI:
931 if (!moreToFollow) {
932 reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
933 XMLMessages.P16_UNTERMINATED);
934 } else {
935 reportFatalXMLError(XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
936 XMLMessages.P78_NOT_WELLFORMED);
937 }
938 break;
939 default:
940 throw new RuntimeException("FWK001 2] ScannerState="+fScannerState+"\n" + "2\t"+fScannerState);
941 }
942 if (!moreToFollow) {
943 reportFatalXMLError(XMLMessages.MSG_ROOT_ELEMENT_REQUIRED,
944 XMLMessages.P1_ELEMENT_REQUIRED);
945 fDispatcher = new EndOfInputDispatcher();
946 setScannerState(SCANNER_STATE_END_OF_INPUT);
947 }
948 }
949 }
950 int fCurrentElementType = -1;
951 public int getCurrentElementType() {
952 return fCurrentElementType;
953 }
954 final class ContentDispatcher implements ScannerDispatcher {
955 private int fContentReader = -1;
956 private int fElementDepth = 0;
957 private int[] fElementTypeStack = new int[8];
958
959 void popElementType() {
960 if (fElementDepth-- == 0) {
961 throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
962 }
963 if (fElementDepth == 0) {
964 fCurrentElementType = - 1;
965 } else {
966 fCurrentElementType = fElementTypeStack[fElementDepth - 1];
967 }
968 }
969
970 public boolean dispatch(boolean keepgoing) throws Exception {
971 do {
972 switch (fScannerState) {
973 case SCANNER_STATE_ROOT_ELEMENT:
974 {
975 scanElementType(fEntityReader, '>', fElementQName);
976 if (fElementQName.rawname != -1) {
977 //
978 // root element
979 //
980 fContentReader = fReaderId;
981 fSeenRootElement = true;
982 //
983 // scan element
984 //
985 if (fEntityReader.lookingAtChar('>', true)) {
986 //
987 // we have more content
988 //
989 fEventHandler.callStartElement(fElementQName);
990 fScannerMarkupDepth--;
991 if (fElementDepth == fElementTypeStack.length) {
992 int[] newStack = new int[fElementDepth * 2];
993 System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
994 fElementTypeStack = newStack;
995 }
996 fCurrentElementType = fElementQName.rawname;
997 fElementTypeStack[fElementDepth] = fElementQName.rawname;
998 fElementDepth++;
999 restoreScannerState(SCANNER_STATE_CONTENT);
1000 } else if (scanElement(fElementQName)) {
1001 //
1002 // we have more content
1003 //
1004 if (fElementDepth == fElementTypeStack.length) {
1005 int[] newStack = new int[fElementDepth * 2];
1006 System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1007 fElementTypeStack = newStack;
1008 }
1009 fCurrentElementType = fElementQName.rawname;
1010 fElementTypeStack[fElementDepth] = fElementQName.rawname;
1011 fElementDepth++;
1012 restoreScannerState(SCANNER_STATE_CONTENT);
1013 } else {
1014 fDispatcher = new TrailingMiscDispatcher();
1015 restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1016 return true;
1017 }
1018 } else {
1019 reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
1020 XMLMessages.P22_NOT_RECOGNIZED);
1021 fDispatcher = new PrologDispatcher();
1022 restoreScannerState(SCANNER_STATE_PROLOG);
1023 return true;
1024 }
1025 break;
1026 }
1027 case SCANNER_STATE_START_OF_MARKUP:
1028 if (fEntityReader.lookingAtChar('?', true)) {
1029 int piTarget = fEntityReader.scanName(' ');
1030 if (piTarget == -1) {
1031 abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
1032 XMLMessages.P16_PITARGET_REQUIRED);
1033 } else if ("xml".equals(fStringPool.toString(piTarget))) {
1034 if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1035 if (fParseTextDecl) {
1036 scanXMLDeclOrTextDecl(true);
1037 fParseTextDecl = false;
1038 } else {
1039 abortMarkup(XMLMessages.MSG_TEXTDECL_MUST_BE_FIRST,
1040 XMLMessages.P30_TEXTDECL_MUST_BE_FIRST);
1041 }
1042 } else { // a PI target matching 'xml'
1043 abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1044 XMLMessages.P17_RESERVED_PITARGET);
1045 }
1046 } else { // PI
1047 scanPI(piTarget);
1048 }
1049 restoreScannerState(SCANNER_STATE_CONTENT);
1050 } else if (fEntityReader.lookingAtChar('!', true)) {
1051 if (fEntityReader.lookingAtChar('-', true)) { // comment ?
1052 if (fEntityReader.lookingAtChar('-', true)) {
1053 scanComment(); // scan through the closing '-->'
1054 } else {
1055 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1056 XMLMessages.P43_NOT_RECOGNIZED);
1057 }
1058 } else {
1059 if (fEntityReader.skippedString(cdata_string)) {
1060 fEntityReader.setInCDSect(true);
1061 fEventHandler.callStartCDATA();
1062 } else {
1063 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1064 XMLMessages.P43_NOT_RECOGNIZED);
1065 }
1066 }
1067 } else {
1068 if (fEntityReader.lookingAtChar('/', true)) {
1069 //
1070 // [42] ETag ::= '</' Name S? '>'
1071 //
1072 if (!scanExpectedElementType(fEntityReader, '>', fCurrentElementType)) {
1073 abortMarkup(XMLMessages.MSG_ETAG_REQUIRED,
1074 XMLMessages.P39_UNTERMINATED,
1075 fCurrentElementType);
1076 } else {
1077 if (!fEntityReader.lookingAtChar('>', true)) {
1078 fEntityReader.skipPastSpaces();
1079 if (!fEntityReader.lookingAtChar('>', true)) {
1080 reportFatalXMLError(XMLMessages.MSG_ETAG_UNTERMINATED,
1081 XMLMessages.P42_UNTERMINATED,
1082 fCurrentElementType);
1083 }
1084 }
1085 fScannerMarkupDepth--;
1086 fEventHandler.callEndElement(fReaderId);
1087 if (fElementDepth-- == 0) {
1088 throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
1089 }
1090 if (fElementDepth == 0) {
1091 fCurrentElementType = - 1;
1092 fDispatcher = new TrailingMiscDispatcher();
1093 restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1094 return true;
1095 } else {
1096 fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1097 }
1098 }
1099 } else {
1100 scanElementType(fEntityReader, '>', fElementQName);
1101 if (fElementQName.rawname != -1) {
1102 //
1103 // element
1104 //
1105 if (fEntityReader.lookingAtChar('>', true)) {
1106 fEventHandler.callStartElement(fElementQName);
1107 fScannerMarkupDepth--;
1108 if (fElementDepth == fElementTypeStack.length) {
1109 int[] newStack = new int[fElementDepth * 2];
1110 System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1111 fElementTypeStack = newStack;
1112 }
1113 fCurrentElementType = fElementQName.rawname;
1114 fElementTypeStack[fElementDepth] = fElementQName.rawname;
1115 fElementDepth++;
1116 } else {
1117 if (scanElement(fElementQName)) {
1118 if (fElementDepth == fElementTypeStack.length) {
1119 int[] newStack = new int[fElementDepth * 2];
1120 System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1121 fElementTypeStack = newStack;
1122 }
1123 fCurrentElementType = fElementQName.rawname;
1124 fElementTypeStack[fElementDepth] = fElementQName.rawname;
1125 fElementDepth++;
1126 }
1127 }
1128 } else {
1129 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1130 XMLMessages.P43_NOT_RECOGNIZED);
1131 }
1132 }
1133 }
1134 restoreScannerState(SCANNER_STATE_CONTENT);
1135 break;
1136 case SCANNER_STATE_CONTENT:
1137 if (fParseTextDecl && fEntityReader.lookingAtChar('<', true)) {
1138 fScannerMarkupDepth++;
1139 setScannerState(SCANNER_STATE_START_OF_MARKUP);
1140 continue;
1141 }
1142 // REVISIT: Is this the right thing to do? Do we need to
1143 // save more information on the stack?
1144 fCurrentElementQName.setValues(-1, -1, fCurrentElementType);
1145 switch (fEntityReader.scanContent(fCurrentElementQName)) {
1146 case XMLEntityHandler.CONTENT_RESULT_START_OF_PI:
1147 fScannerMarkupDepth++;
1148 int piTarget = fEntityReader.scanName(' ');
1149 if (piTarget == -1) {
1150 abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
1151 XMLMessages.P16_PITARGET_REQUIRED);
1152 } else if ("xml".equals(fStringPool.toString(piTarget))) {
1153 if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1154 if (fReaderId == fContentReader) {
1155 abortMarkup(XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
1156 XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
1157 } else {
1158 abortMarkup(XMLMessages.MSG_TEXTDECL_MUST_BE_FIRST,
1159 XMLMessages.P30_TEXTDECL_MUST_BE_FIRST);
1160 }
1161 } else { // a PI target matching 'xml'
1162 abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1163 XMLMessages.P17_RESERVED_PITARGET);
1164 }
1165 } else { // PI
1166 scanPI(piTarget);
1167 }
1168 break;
1169 case XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT:
1170 fScannerMarkupDepth++;
1171 fParseTextDecl = false;
1172 scanComment(); // scan through the closing '-->'
1173 break;
1174 case XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT:
1175 fScannerMarkupDepth++;
1176 fParseTextDecl = false;
1177 fEntityReader.setInCDSect(true);
1178 fEventHandler.callStartCDATA();
1179 break;
1180 case XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG:
1181 fScannerMarkupDepth++;
1182 fParseTextDecl = false;
1183 //
1184 // [42] ETag ::= '</' Name S? '>'
1185 //
1186 if (!scanExpectedElementType(fEntityReader, '>', fCurrentElementType)) {
1187 abortMarkup(XMLMessages.MSG_ETAG_REQUIRED,
1188 XMLMessages.P39_UNTERMINATED,
1189 fCurrentElementType);
1190 } else {
1191 if (!fEntityReader.lookingAtChar('>', true)) {
1192 fEntityReader.skipPastSpaces();
1193 if (!fEntityReader.lookingAtChar('>', true)) {
1194 reportFatalXMLError(XMLMessages.MSG_ETAG_UNTERMINATED,
1195 XMLMessages.P42_UNTERMINATED,
1196 fCurrentElementType);
1197 }
1198 }
1199 fScannerMarkupDepth--;
1200 fEventHandler.callEndElement(fReaderId);
1201 if (fElementDepth-- == 0) {
1202 throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
1203 }
1204 if (fElementDepth == 0) {
1205 fCurrentElementType = - 1;
1206 fDispatcher = new TrailingMiscDispatcher();
1207 restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1208 return true;
1209 } else {
1210 fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1211 }
1212 }
1213 restoreScannerState(SCANNER_STATE_CONTENT);
1214 break;
1215 case XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT:
1216 {
1217 fScannerMarkupDepth++;
1218 fParseTextDecl = false;
1219 scanElementType(fEntityReader, '>', fElementQName);
1220 if (fElementQName.rawname != -1) {
1221 if (fEntityReader.lookingAtChar('>', true)) {
1222 fEventHandler.callStartElement(fElementQName);
1223 fScannerMarkupDepth--;
1224 if (fElementDepth == fElementTypeStack.length) {
1225 int[] newStack = new int[fElementDepth * 2];
1226 System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1227 fElementTypeStack = newStack;
1228 }
1229 fCurrentElementType = fElementQName.rawname;
1230 fElementTypeStack[fElementDepth] = fElementQName.rawname;
1231 fElementDepth++;
1232 } else {
1233 if (scanElement(fElementQName)) {
1234 if (fElementDepth == fElementTypeStack.length) {
1235 int[] newStack = new int[fElementDepth * 2];
1236 System.arraycopy(fElementTypeStack, 0, newStack, 0, fElementDepth);
1237 fElementTypeStack = newStack;
1238 }
1239 fCurrentElementType = fElementQName.rawname;
1240 fElementTypeStack[fElementDepth] = fElementQName.rawname;
1241 fElementDepth++;
1242 }
1243 }
1244 } else {
1245 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1246 XMLMessages.P43_NOT_RECOGNIZED);
1247 }
1248 if (fScannerState != SCANNER_STATE_END_OF_INPUT)
1249 fScannerState = SCANNER_STATE_CONTENT;
1250 break;
1251 }
1252 case XMLEntityHandler.CONTENT_RESULT_MATCHING_ETAG:
1253 {
1254 fParseTextDecl = false;
1255 fEventHandler.callEndElement(fReaderId);
1256 if (fElementDepth-- == 0) {
1257 throw new RuntimeException("FWK002 popElementType: fElementDepth-- == 0.");
1258 }
1259 if (fElementDepth == 0) {
1260 fCurrentElementType = - 1;
1261 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1262 fDispatcher = new TrailingMiscDispatcher();
1263 fScannerState = SCANNER_STATE_TRAILING_MISC;
1264 }
1265 return true;
1266 } else {
1267 fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1268 }
1269 if (fScannerState != SCANNER_STATE_END_OF_INPUT)
1270 fScannerState = SCANNER_STATE_CONTENT;
1271 break;
1272 }
1273 case XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF:
1274 fParseTextDecl = false;
1275 //
1276 // [67] Reference ::= EntityRef | CharRef
1277 // [68] EntityRef ::= '&' Name ';'
1278 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1279 //
1280 setScannerState(SCANNER_STATE_REFERENCE);
1281 int num = scanCharRef();
1282 // if (num == -1) num = 0xfffd; // REVISIT - alternative is to use Unicode replacement char
1283 if (num != -1)
1284 fEventHandler.callCharacters(num);
1285 restoreScannerState(SCANNER_STATE_CONTENT);
1286 break;
1287 case XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT:
1288 // REVISIT - This should hopefully get us the "reference not
1289 // contained in one entity" error when endOfInput is called.
1290 // Test that this is so...
1291 //
1292 // fall through...
1293 //
1294 case XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF:
1295 fParseTextDecl = false;
1296 //
1297 // [68] EntityRef ::= '&' Name ';'
1298 //
1299 setScannerState(SCANNER_STATE_REFERENCE);
1300 int nameOffset = fEntityReader.currentOffset();
1301 fEntityReader.skipPastName(';');
1302 int nameLength = fEntityReader.currentOffset() - nameOffset;
1303 if (nameLength == 0) {
1304 reportFatalXMLError(XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE,
1305 XMLMessages.P68_NAME_REQUIRED);
1306 restoreScannerState(SCANNER_STATE_CONTENT);
1307 } else if (!fEntityReader.lookingAtChar(';', true)) {
1308 reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE,
1309 XMLMessages.P68_SEMICOLON_REQUIRED,
1310 fEntityReader.addString(nameOffset, nameLength));
1311 restoreScannerState(SCANNER_STATE_CONTENT);
1312 } else {
1313 restoreScannerState(SCANNER_STATE_CONTENT);
1314 int entityName = fEntityReader.addSymbol(nameOffset, nameLength);
1315 fParseTextDecl = fEntityHandler.startReadingFromEntity(entityName, fElementDepth, XMLEntityHandler.ENTITYREF_IN_CONTENT);
1316 }
1317 break;
1318 case XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT:
1319 fParseTextDecl = false;
1320 //
1321 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1322 // [21] CDEnd ::= ']]>'
1323 //
1324 if (fEntityReader.getInCDSect()) {
1325 fEntityReader.setInCDSect(false);
1326 fEventHandler.callEndCDATA();
1327 fScannerMarkupDepth--;
1328 } else {
1329 reportFatalXMLError(XMLMessages.MSG_CDEND_IN_CONTENT,
1330 XMLMessages.P14_INVALID);
1331 }
1332 restoreScannerState(SCANNER_STATE_CONTENT);
1333 break;
1334 case XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR:
1335 fParseTextDecl = false;
1336 //
1337 // The reader will also use this state if it
1338 // encounters the end of input while reading
1339 // content. We need to check for this case.
1340 //
1341 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1342 if (!fEntityReader.lookingAtValidChar(false)) {
1343 //
1344 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] // any Unicode character, excluding the
1345 // | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
1346 //
1347 int invChar = fEntityReader.scanInvalidChar();
1348 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1349 if (invChar >= 0) {
1350 if (fEntityReader.getInCDSect()) {
1351 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_CDSECT,
1352 XMLMessages.P20_INVALID_CHARACTER,
1353 Integer.toHexString(invChar));
1354 } else {
1355 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_CONTENT,
1356 XMLMessages.P43_INVALID_CHARACTER,
1357 Integer.toHexString(invChar));
1358 }
1359 }
1360 }
1361 }
1362 restoreScannerState(SCANNER_STATE_CONTENT);
1363 }
1364 break;
1365 case XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED:
1366 fParseTextDecl = false;
1367 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1368 XMLMessages.P43_NOT_RECOGNIZED);
1369 break;
1370 case XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT:
1371 // REVISIT - This should hopefully get us the "markup not
1372 // contained in one entity" error when endOfInput is called.
1373 // Test that this is so...
1374 fScannerMarkupDepth++;
1375 fParseTextDecl = false;
1376 fScannerState = SCANNER_STATE_START_OF_MARKUP;
1377 break;
1378 default:
1379 throw new RuntimeException("FWK001 3] ScannerState="+fScannerState+"\n" + "3\t"+fScannerState); // should not happen
1380 }
1381 break;
1382 default:
1383 throw new RuntimeException("FWK001 4] ScannerState="+fScannerState+"\n" + "4\t"+fScannerState);
1384 }
1385 } while (fScannerState != SCANNER_STATE_END_OF_INPUT && keepgoing);
1386 return true;
1387 }
1388 public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
1389 switch (fScannerState) {
1390 case SCANNER_STATE_ROOT_ELEMENT:
1391 case SCANNER_STATE_START_OF_MARKUP:
1392 break;
1393 case SCANNER_STATE_CONTENT:
1394 if (fEntityReader.getInCDSect()) {
1395 reportFatalXMLError(XMLMessages.MSG_CDSECT_UNTERMINATED,
1396 XMLMessages.P18_UNTERMINATED);
1397 }
1398 break;
1399 case SCANNER_STATE_ATTRIBUTE_LIST:
1400 if (!moreToFollow) {
1401// REVISIT reportFatalXMLError(XMLMessages.MSG_TAG1);
1402 } else {
1403// REVISIT reportFatalXMLError(XMLMessages.MSG_TAG1);
1404 }
1405 break;
1406 case SCANNER_STATE_ATTRIBUTE_NAME:
1407 if (!moreToFollow) {
1408// REVISIT reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1409 } else {
1410// REVISIT reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1411 }
1412 break;
1413 case SCANNER_STATE_ATTRIBUTE_VALUE:
1414 if (!moreToFollow) {
1415 reportFatalXMLError(XMLMessages.MSG_ATTRIBUTE_VALUE_UNTERMINATED,
1416 XMLMessages.P10_UNTERMINATED,
1417 fAttValueElementType,
1418 fAttValueAttrName);
1419 } else if (fReaderId == fAttValueReader) {
1420// REVISIT reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1421 } else {
1422 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
1423 }
1424 break;
1425 case SCANNER_STATE_COMMENT:
1426 if (!moreToFollow) {
1427 reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
1428 XMLMessages.P15_UNTERMINATED);
1429 } else {
1430 reportFatalXMLError(XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
1431 XMLMessages.P78_NOT_WELLFORMED);
1432 }
1433 break;
1434 case SCANNER_STATE_PI:
1435 if (!moreToFollow) {
1436 reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
1437 XMLMessages.P16_UNTERMINATED);
1438 } else {
1439 reportFatalXMLError(XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
1440 XMLMessages.P78_NOT_WELLFORMED);
1441 }
1442 break;
1443 case SCANNER_STATE_REFERENCE:
1444 if (!moreToFollow) {
1445 reportFatalXMLError(XMLMessages.MSG_REFERENCE_UNTERMINATED,
1446 XMLMessages.P67_UNTERMINATED);
1447 } else {
1448 reportFatalXMLError(XMLMessages.MSG_REFERENCE_NOT_IN_ONE_ENTITY,
1449 XMLMessages.P78_NOT_WELLFORMED);
1450 }
1451 break;
1452 default:
1453 throw new RuntimeException("FWK001 5] ScannerState="+fScannerState+"\n" + "5\t"+fScannerState);
1454 }
1455 if (!moreToFollow) {
1456 if (fElementDepth > 0)
1457 reportFatalXMLError(XMLMessages.MSG_ETAG_REQUIRED,
1458 XMLMessages.P39_UNTERMINATED,
1459 fCurrentElementType);
1460 fDispatcher = new EndOfInputDispatcher();
1461 setScannerState(SCANNER_STATE_END_OF_INPUT);
1462 }
1463 }
1464 }
1465 final class TrailingMiscDispatcher implements ScannerDispatcher {
1466 public boolean dispatch(boolean keepgoing) throws Exception {
1467 do {
1468 if (fEntityReader.lookingAtChar('<', true)) {
1469 fScannerMarkupDepth++;
1470 setScannerState(SCANNER_STATE_START_OF_MARKUP);
1471 if (fEntityReader.lookingAtChar('?', true)) {
1472 int piTarget = fEntityReader.scanName(' ');
1473 if (piTarget == -1) {
1474 abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
1475 XMLMessages.P16_PITARGET_REQUIRED);
1476 } else if ("xml".equals(fStringPool.toString(piTarget))) {
1477 if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1478 abortMarkup(XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
1479 XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
1480 } else { // a PI target matching 'xml'
1481 abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1482 XMLMessages.P17_RESERVED_PITARGET);
1483 }
1484 } else { // PI
1485 scanPI(piTarget);
1486 }
1487 } else if (fEntityReader.lookingAtChar('!', true)) {
1488 if (fEntityReader.lookingAtChar('-', true) &&
1489 fEntityReader.lookingAtChar('-', true)) { // comment ?
1490 scanComment(); // scan through the closing '-->'
1491 } else {
1492 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1493 XMLMessages.P27_NOT_RECOGNIZED);
1494 }
1495 } else {
1496 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1497 XMLMessages.P27_NOT_RECOGNIZED);
1498 }
1499 restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1500 } else if (fEntityReader.lookingAtSpace(true)) {
1501 fEntityReader.skipPastSpaces();
1502 } else if (!fEntityReader.lookingAtValidChar(false)) {
1503 int invChar = fEntityReader.scanInvalidChar();
1504 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1505 if (invChar >= 0) {
1506 String arg = Integer.toHexString(invChar);
1507 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_MISC,
1508 XMLMessages.P27_INVALID_CHARACTER,
1509 arg);
1510 }
1511 }
1512 } else {
1513 reportFatalXMLError(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1514 XMLMessages.P27_NOT_RECOGNIZED);
1515 fEntityReader.lookingAtValidChar(true);
1516 }
1517 } while (fScannerState != SCANNER_STATE_END_OF_INPUT && keepgoing);
1518 return true;
1519 }
1520 public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
1521 if (moreToFollow)
1522 throw new RuntimeException("FWK003 TrailingMiscDispatcher.endOfInput moreToFollow");
1523 switch (fScannerState) {
1524 case SCANNER_STATE_TRAILING_MISC:
1525 case SCANNER_STATE_START_OF_MARKUP:
1526 break;
1527 case SCANNER_STATE_COMMENT:
1528 reportFatalXMLError(XMLMessages.MSG_COMMENT_UNTERMINATED,
1529 XMLMessages.P15_UNTERMINATED);
1530 break;
1531 case SCANNER_STATE_PI:
1532 reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
1533 XMLMessages.P16_UNTERMINATED);
1534 break;
1535 default:
1536 throw new RuntimeException("FWK001 6] ScannerState="+fScannerState+"\n" + "6\t"+fScannerState);
1537 }
1538 fDispatcher = new EndOfInputDispatcher();
1539 setScannerState(SCANNER_STATE_END_OF_INPUT);
1540 }
1541 }
1542 final class EndOfInputDispatcher implements ScannerDispatcher {
1543 public boolean dispatch(boolean keepgoing) throws Exception {
1544 if (fScannerState != SCANNER_STATE_TERMINATED)
1545 fEventHandler.callEndDocument();
1546 setScannerState(SCANNER_STATE_TERMINATED);
1547 return false;
1548 }
1549 public void endOfInput(int entityName, boolean moreToFollow) throws Exception {
1550 throw new RuntimeException("FWK001 7] ScannerState="+fScannerState+"\n" + "7\t"+fScannerState);
1551 }
1552 }
1553 //
1554 // From the standard:
1555 //
1556 // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1557 // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1558 // [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1559 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1560 // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1561 // | ('"' ('yes' | 'no') '"'))
1562 //
1563 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1564 //
1565 void scanXMLDeclOrTextDecl(boolean scanningTextDecl) throws Exception
1566 {
1567 int version = -1;
1568 int encoding = -1;
1569 int standalone = -1;
1570 final int XMLDECL_START = 0;
1571 final int XMLDECL_VERSION = 1;
1572 final int XMLDECL_ENCODING = 2;
1573 final int XMLDECL_STANDALONE = 3;
1574 final int XMLDECL_FINISHED = 4;
1575 int state = XMLDECL_START;
1576 do {
1577 fEntityReader.skipPastSpaces();
1578 int offset = fEntityReader.currentOffset();
1579 if (scanningTextDecl) {
1580 if (state == XMLDECL_START && fEntityReader.skippedString(version_string)) {
1581 state = XMLDECL_VERSION;
1582 } else if (fEntityReader.skippedString(encoding_string)) {
1583 state = XMLDECL_ENCODING;
1584 } else {
1585 abortMarkup(XMLMessages.MSG_ENCODINGDECL_REQUIRED,
1586 XMLMessages.P77_ENCODINGDECL_REQUIRED);
1587 return;
1588 }
1589 } else {
1590 if (state == XMLDECL_START) {
1591 if (!fEntityReader.skippedString(version_string)) {
1592 abortMarkup(XMLMessages.MSG_VERSIONINFO_REQUIRED,
1593 XMLMessages.P23_VERSIONINFO_REQUIRED);
1594 return;
1595 }
1596 state = XMLDECL_VERSION;
1597 } else {
1598 if (state == XMLDECL_VERSION) {
1599 if (fEntityReader.skippedString(encoding_string))
1600 state = XMLDECL_ENCODING;
1601 else
1602 state = XMLDECL_STANDALONE;
1603 } else
1604 state = XMLDECL_STANDALONE;
1605 if (state == XMLDECL_STANDALONE && !fEntityReader.skippedString(standalone_string))
1606 break;
1607 }
1608 }
1609 int length = fEntityReader.currentOffset() - offset;
1610 fEntityReader.skipPastSpaces();
1611 if (!fEntityReader.lookingAtChar('=', true)) {
1612 int majorCode = scanningTextDecl ?
1613 XMLMessages.MSG_EQ_REQUIRED_IN_TEXTDECL :
1614 XMLMessages.MSG_EQ_REQUIRED_IN_XMLDECL;
1615 int minorCode = state == XMLDECL_VERSION ?
1616 XMLMessages.P24_EQ_REQUIRED :
1617 (state == XMLDECL_ENCODING ?
1618 XMLMessages.P80_EQ_REQUIRED :
1619 XMLMessages.P32_EQ_REQUIRED);
1620 abortMarkup(majorCode, minorCode, fEntityReader.addString(offset, length));
1621 return;
1622 }
1623 fEntityReader.skipPastSpaces();
1624 int result = fEntityReader.scanStringLiteral();
1625 switch (result) {
1626 case XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED:
1627 {
1628 int majorCode = scanningTextDecl ?
1629 XMLMessages.MSG_QUOTE_REQUIRED_IN_TEXTDECL :
1630 XMLMessages.MSG_QUOTE_REQUIRED_IN_XMLDECL;
1631 int minorCode = state == XMLDECL_VERSION ?
1632 XMLMessages.P24_QUOTE_REQUIRED :
1633 (state == XMLDECL_ENCODING ?
1634 XMLMessages.P80_QUOTE_REQUIRED :
1635 XMLMessages.P32_QUOTE_REQUIRED);
1636 abortMarkup(majorCode, minorCode, fEntityReader.addString(offset, length));
1637 return;
1638 }
1639 case XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR:
1640 int invChar = fEntityReader.scanInvalidChar();
1641 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1642 if (invChar >= 0) {
1643 int majorCode = scanningTextDecl ?
1644 XMLMessages.MSG_INVALID_CHAR_IN_TEXTDECL :
1645 XMLMessages.MSG_INVALID_CHAR_IN_XMLDECL;
1646 int minorCode = state == XMLDECL_VERSION ?
1647 XMLMessages.P26_INVALID_CHARACTER :
1648 (state == XMLDECL_ENCODING ?
1649 XMLMessages.P81_INVALID_CHARACTER :
1650 XMLMessages.P32_INVALID_CHARACTER);
1651 reportFatalXMLError(majorCode, minorCode, Integer.toHexString(invChar));
1652 }
1653 skipPastEndOfCurrentMarkup();
1654 }
1655 return;
1656 default:
1657 break;
1658 }
1659 switch (state) {
1660 case XMLDECL_VERSION:
1661 //
1662 // version="..."
1663 //
1664 version = result;
1665 String versionString = fStringPool.toString(version);
1666 if (!"1.0".equals(versionString)) {
1667 if (!validVersionNum(versionString)) {
1668 abortMarkup(XMLMessages.MSG_VERSIONINFO_INVALID,
1669 XMLMessages.P26_INVALID_VALUE,
1670 versionString);
1671 return;
1672 }
1673 // NOTE: RECOVERABLE ERROR
1674 Object[] args = { versionString };
1675 fErrorReporter.reportError(fErrorReporter.getLocator(),
1676 XMLMessages.XML_DOMAIN,
1677 XMLMessages.MSG_VERSION_NOT_SUPPORTED,
1678 XMLMessages.P26_NOT_SUPPORTED,
1679 args,
1680 XMLErrorReporter.ERRORTYPE_RECOVERABLE_ERROR);
1681 // REVISIT - hope it is compatible...
1682 // skipPastEndOfCurrentMarkup();
1683 // return;
1684 }
1685 if (!fEntityReader.lookingAtSpace(true)) {
1686 if (scanningTextDecl) {
1687 abortMarkup(XMLMessages.MSG_SPACE_REQUIRED_IN_TEXTDECL,
1688 XMLMessages.P80_WHITESPACE_REQUIRED);
1689 return;
1690 }
1691 state = XMLDECL_FINISHED;
1692 }
1693 break;
1694 case XMLDECL_ENCODING:
1695 //
1696 // encoding = "..."
1697 //
1698 encoding = result;
1699 String encodingString = fStringPool.toString(encoding);
1700 if (!validEncName(encodingString)) {
1701 abortMarkup(XMLMessages.MSG_ENCODINGDECL_INVALID,
1702 XMLMessages.P81_INVALID_VALUE,
1703 encodingString);
1704 return;
1705 }
1706 if (!fEntityReader.lookingAtSpace(true)) {
1707 state = XMLDECL_FINISHED;
1708 } else if (scanningTextDecl) {
1709 fEntityReader.skipPastSpaces();
1710 state = XMLDECL_FINISHED;
1711 }
1712 break;
1713 case XMLDECL_STANDALONE:
1714 //
1715 // standalone="..."
1716 //
1717 standalone = result;
1718 String standaloneString = fStringPool.toString(standalone);
1719 boolean yes = "yes".equals(standaloneString);
1720 if (!yes && !"no".equals(standaloneString)) {
1721 abortMarkup(XMLMessages.MSG_SDDECL_INVALID,
1722 XMLMessages.P32_INVALID_VALUE,
1723 standaloneString);
1724 return;
1725 }
1726 fStandalone = yes;
1727 fEntityReader.skipPastSpaces();
1728 state = XMLDECL_FINISHED;
1729 break;
1730 }
1731 } while (state != XMLDECL_FINISHED);
1732 if (!fEntityReader.lookingAtChar('?', true) || !fEntityReader.lookingAtChar('>', true)) {
1733 int majorCode, minorCode;
1734 if (scanningTextDecl) {
1735 majorCode = XMLMessages.MSG_TEXTDECL_UNTERMINATED;
1736 minorCode = XMLMessages.P77_UNTERMINATED;
1737 } else {
1738 majorCode = XMLMessages.MSG_XMLDECL_UNTERMINATED;
1739 minorCode = XMLMessages.P23_UNTERMINATED;
1740 }
1741 abortMarkup(majorCode, minorCode);
1742 return;
1743 }
1744 fScannerMarkupDepth--;
1745 if (scanningTextDecl) {
1746 fEventHandler.callTextDecl(version, encoding);
1747 } else {
1748 //
1749 // Now that we have hit '?>' we are done with XML decl. Call the
1750 // handler before returning.
1751 //
1752 fEventHandler.callXMLDecl(version, encoding, standalone);
1753 // if we see standalone = 'yes', call the eventHandler - XMLValidator
1754 if (fStandalone) {
1755 fEventHandler.callStandaloneIsYes();
1756 }
1757 }
1758 }
1759 //
1760 // From the standard:
1761 //
1762 // [39] element ::= EmptyElemTag | STag content ETag
1763 // [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1764 // [40] STag ::= '<' Name (S Attribute)* S? '>'
1765 // [41] Attribute ::= Name Eq AttValue
1766 // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1767 // [67] Reference ::= EntityRef | CharRef
1768 // [68] EntityRef ::= '&' Name ';'
1769 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1770 // [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1771 // [42] ETag ::= '</' Name S? '>'
1772 //
1773 // Note: We have already scanned Name.
1774 //
1775 boolean scanElement(QName element) throws Exception
1776 {
1777 //
1778 // Scan for attributes
1779 //
1780 boolean greater = false;
1781 boolean slash = false;
1782 if (greater = fEntityReader.lookingAtChar('>', true)) {
1783 // no attributes
1784 } else if (fEntityReader.lookingAtSpace(true)) {
1785 int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_LIST);
1786 while (true) {
1787 fEntityReader.skipPastSpaces();
1788 //
1789 // [41] Attribute ::= Name Eq AttValue
1790 //
1791 if ((greater = fEntityReader.lookingAtChar('>', true)) || (slash = fEntityReader.lookingAtChar('/', true)))
1792 break;
1793 //
1794 // Name
1795 //
1796 setScannerState(SCANNER_STATE_ATTRIBUTE_NAME);
1797 scanAttributeName(fEntityReader, element, fAttributeQName);
1798 if (fAttributeQName.rawname == -1) {
1799 break;
1800 }
1801 //
1802 // Eq
1803 //
1804 fEntityReader.skipPastSpaces();
1805 if (!fEntityReader.lookingAtChar('=', true)) {
1806 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1807 abortMarkup(XMLMessages.MSG_EQ_REQUIRED_IN_ATTRIBUTE,
1808 XMLMessages.P41_EQ_REQUIRED,
1809 element.rawname, fAttributeQName.rawname);
1810 restoreScannerState(previousState);
1811 }
1812 return false;
1813 }
1814 fEntityReader.skipPastSpaces();
1815 int result = scanAttValue(element, fAttributeQName, false);
1816 if (result == RESULT_FAILURE) {
1817 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1818 skipPastEndOfCurrentMarkup();
1819 restoreScannerState(previousState);
1820 }
1821 return false;
1822 } else if (result == RESULT_DUPLICATE_ATTR) {
1823 reportFatalXMLError(XMLMessages.MSG_ATTRIBUTE_NOT_UNIQUE,
1824 XMLMessages.WFC_UNIQUE_ATT_SPEC,
1825 element.rawname, fAttributeQName.rawname);
1826 }
1827 //The validator will check whether we have a duplicate attr in the start tag.
1828 if ( fEventHandler.attribute(element, fAttributeQName, result) ) {
1829 reportFatalXMLError(XMLMessages.MSG_ATTRIBUTE_NOT_UNIQUE,
1830 XMLMessages.WFC_UNIQUE_ATT_SPEC,
1831 element.rawname, fAttributeQName.rawname);
1832 }
1833 restoreScannerState(SCANNER_STATE_ATTRIBUTE_LIST);
1834 if (!fEntityReader.lookingAtSpace(true)) {
1835 if (!(greater = fEntityReader.lookingAtChar('>', true)))
1836 slash = fEntityReader.lookingAtChar('/', true);
1837 break;
1838 }
1839 }
1840 restoreScannerState(previousState);
1841 } else {
1842 slash = fEntityReader.lookingAtChar('/', true);
1843 }
1844 if (!greater && (!slash || !fEntityReader.lookingAtChar('>', true))) { // '>' or '/>'
1845 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1846 abortMarkup(XMLMessages.MSG_ELEMENT_UNTERMINATED,
1847 XMLMessages.P40_UNTERMINATED,
1848 element.rawname);
1849 }
1850 return false;
1851 }
1852 fEventHandler.callStartElement(element);
1853 fScannerMarkupDepth--;
1854 if (slash) { // '/>'
1855 fEventHandler.callEndElement(fReaderId);
1856 return false;
1857 } else {
1858 return true;
1859 }
1860 }
1861 //
1862 // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1863 //
1864 int scanCharRef() throws Exception {
1865 int valueOffset = fEntityReader.currentOffset();
1866 boolean hex = fEntityReader.lookingAtChar('x', true);
1867 int num = fEntityReader.scanCharRef(hex);
1868 if (num < 0) {
1869 switch (num) {
1870 case XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED:
1871 reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_CHARREF,
1872 XMLMessages.P66_SEMICOLON_REQUIRED);
1873 return -1;
1874 case XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR:
1875 int majorCode = hex ? XMLMessages.MSG_HEXDIGIT_REQUIRED_IN_CHARREF :
1876 XMLMessages.MSG_DIGIT_REQUIRED_IN_CHARREF;
1877 int minorCode = hex ? XMLMessages.P66_HEXDIGIT_REQUIRED :
1878 XMLMessages.P66_DIGIT_REQUIRED;
1879 reportFatalXMLError(majorCode, minorCode);
1880 return -1;
1881 case XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE:
1882 num = 0x110000; // this will cause the right error to be reported below...
1883 break;
1884 }
1885 }
1886 //
1887 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] // any Unicode character, excluding the
1888 // | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
1889 //
1890 if (num < 0x20) {
1891 if (num == 0x09 || num == 0x0A || num == 0x0D) {
1892 return num;
1893 }
1894 } else if (num <= 0xD7FF || (num >= 0xE000 && (num <= 0xFFFD || (num >= 0x10000 && num <= 0x10FFFF)))) {
1895 return num;
1896 }
1897 int valueLength = fEntityReader.currentOffset() - valueOffset;
1898 reportFatalXMLError(XMLMessages.MSG_INVALID_CHARREF,
1899 XMLMessages.WFC_LEGAL_CHARACTER,
1900 fEntityReader.addString(valueOffset, valueLength));
1901 return -1;
1902 }
1903 //
1904 // From the standard:
1905 //
1906 // [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1907 //
1908 // Called after scanning past '<!--'
1909 //
1910 void scanComment() throws Exception
1911 {
1912 int commentOffset = fEntityReader.currentOffset();
1913 boolean sawDashDash = false;
1914 int previousState = setScannerState(SCANNER_STATE_COMMENT);
1915 while (fScannerState == SCANNER_STATE_COMMENT) {
1916 if (fEntityReader.lookingAtChar('-', false)) {
1917 int nextEndOffset = fEntityReader.currentOffset();
1918 int endOffset = 0;
1919 fEntityReader.lookingAtChar('-', true);
1920 int offset = fEntityReader.currentOffset();
1921 int count = 1;
1922 while (fEntityReader.lookingAtChar('-', true)) {
1923 count++;
1924 endOffset = nextEndOffset;
1925 nextEndOffset = offset;
1926 offset = fEntityReader.currentOffset();
1927 }
1928 if (count > 1) {
1929 if (fEntityReader.lookingAtChar('>', true)) {
1930 if (!sawDashDash && count > 2) {
1931 reportFatalXMLError(XMLMessages.MSG_DASH_DASH_IN_COMMENT,
1932 XMLMessages.P15_DASH_DASH);
1933 sawDashDash = true;
1934 }
1935 fScannerMarkupDepth--;
1936 fEventHandler.callComment(fEntityReader.addString(commentOffset, endOffset - commentOffset));
1937 restoreScannerState(previousState);
1938 return;
1939 } else if (!sawDashDash) {
1940 reportFatalXMLError(XMLMessages.MSG_DASH_DASH_IN_COMMENT,
1941 XMLMessages.P15_DASH_DASH);
1942 sawDashDash = true;
1943 }
1944 }
1945 } else {
1946 if (!fEntityReader.lookingAtValidChar(true)) {
1947 int invChar = fEntityReader.scanInvalidChar();
1948 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1949 if (invChar >= 0) {
1950 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_COMMENT,
1951 XMLMessages.P15_INVALID_CHARACTER,
1952 Integer.toHexString(invChar));
1953 }
1954 }
1955 }
1956 }
1957 }
1958 restoreScannerState(previousState);
1959 }
1960 //
1961 // From the standard:
1962 //
1963 // [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1964 // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1965 //
1966 void scanPI(int piTarget) throws Exception
1967 {
1968 String piTargetString = fStringPool.toString(piTarget);
1969 if (piTargetString.length() == 3 &&
1970 (piTargetString.charAt(0) == 'X' || piTargetString.charAt(0) == 'x') &&
1971 (piTargetString.charAt(1) == 'M' || piTargetString.charAt(1) == 'm') &&
1972 (piTargetString.charAt(2) == 'L' || piTargetString.charAt(2) == 'l')) {
1973 abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
1974 XMLMessages.P17_RESERVED_PITARGET);
1975 return;
1976 }
1977 int prevState = setScannerState(SCANNER_STATE_PI);
1978 int piDataOffset = -1;
1979 int piDataLength = -1;
1980 if (!fEntityReader.lookingAtSpace(true)) {
1981 if (!fEntityReader.lookingAtChar('?', true) || !fEntityReader.lookingAtChar('>', true)) {
1982 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1983 abortMarkup(XMLMessages.MSG_SPACE_REQUIRED_IN_PI,
1984 XMLMessages.P16_WHITESPACE_REQUIRED);
1985 restoreScannerState(prevState);
1986 }
1987 return;
1988 }
1989 piDataLength = 0;
1990 } else {
1991 fEntityReader.skipPastSpaces();
1992 piDataOffset = fEntityReader.currentOffset();
1993 while (fScannerState == SCANNER_STATE_PI) {
1994 while (fEntityReader.lookingAtChar('?', false)) {
1995 int offset = fEntityReader.currentOffset();
1996 fEntityReader.lookingAtChar('?', true);
1997 if (fEntityReader.lookingAtChar('>', true)) {
1998 piDataLength = offset - piDataOffset;
1999 break;
2000 }
2001 }
2002 if (piDataLength >= 0)
2003 break;
2004 if (!fEntityReader.lookingAtValidChar(true)) {
2005 int invChar = fEntityReader.scanInvalidChar();
2006 if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
2007 if (invChar >= 0) {
2008 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_PI,
2009 XMLMessages.P16_INVALID_CHARACTER,
2010 Integer.toHexString(invChar));
2011 }
2012 skipPastEndOfCurrentMarkup();
2013 restoreScannerState(prevState);
2014 }
2015 return;
2016 }
2017 }
2018 }
2019 fScannerMarkupDepth--;
2020 restoreScannerState(prevState);
2021 int piData = piDataLength == 0 ?
2022 StringPool.EMPTY_STRING : fEntityReader.addString(piDataOffset, piDataLength);
2023 fEventHandler.callProcessingInstruction(piTarget, piData);
2024 }
2025
2026 /** Sets whether the parser preprocesses namespaces. */
2027 public void setNamespacesEnabled(boolean enabled) {
2028 fNamespacesEnabled = enabled;
2029 }
2030
2031 /** Returns whether the parser processes namespaces. */
2032 public boolean getNamespacesEnabled() {
2033 return fNamespacesEnabled;
2034 }
2035
2036 /** Sets whether the parser validates. */
2037 public void setValidationEnabled(boolean enabled) {
2038 fValidationEnabled = enabled;
2039 if (fDTDScanner != null) {
2040 fDTDScanner.setValidationEnabled(enabled);
2041 }
2042 }
2043
2044 /** Returns true if validation is turned on. */
2045 public boolean getValidationEnabled() {
2046 return fValidationEnabled;
2047 }
2048
2049 // old EventHandler methods pushed back into scanner
2050
2051 /** Scans element type. */
2052 private void scanElementType(XMLEntityHandler.EntityReader entityReader,
2053 char fastchar, QName element) throws Exception {
2054
2055 if (!fNamespacesEnabled) {
2056 element.clear();
2057 element.localpart = entityReader.scanName(fastchar);
2058 element.rawname = element.localpart;
2059 }
2060 else {
2061 entityReader.scanQName(fastchar, element);
2062 if (entityReader.lookingAtChar(':', false)) {
2063 fErrorReporter.reportError(fErrorReporter.getLocator(),
2064 XMLMessages.XML_DOMAIN,
2065 XMLMessages.MSG_TWO_COLONS_IN_QNAME,
2066 XMLMessages.P5_INVALID_CHARACTER,
2067 null,
2068 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
2069 entityReader.skipPastNmtoken(' ');
2070 }
2071 }
2072
2073 fEventHandler.element(element);
2074
2075 } // scanElementType(XMLEntityHandler.EntityReader,char,QName)
2076
2077 /** Scans expected element type. */
2078 private boolean scanExpectedElementType(XMLEntityHandler.EntityReader entityReader,
2079 char fastchar, int elementType)
2080 throws Exception {
2081
2082 /***/
2083 // REVISIT: Why aren't we using the 'element' parameter? -Ac
2084 // REVISIT: I replaced the 'fCurrentElement' with 'element' parameter, still working,
2085 // just wondering Why are we using CharArrayRange in the first place? -ericye
2086 if (fCurrentElementCharArrayRange == null) {
2087 fCurrentElementCharArrayRange = fStringPool.createCharArrayRange();
2088 }
2089 fStringPool.getCharArrayRange(elementType, fCurrentElementCharArrayRange);
2090 return entityReader.scanExpectedName(fastchar, fCurrentElementCharArrayRange);
2091 /***
2092 entityReader.scanQName(fastchar, element);
2093 return true;
2094 /***/
2095
2096 } // scanExpectedElementType(XMLEntityHandler.EntityReader,char,QName)
2097
2098 /** Scans attribute name. */
2099 private void scanAttributeName(XMLEntityHandler.EntityReader entityReader,
2100 QName element, QName attribute)
2101 throws Exception {
2102
2103 /***
2104 // REVISIT: What's this check for?
2105 if (!fSeenRootElement) {
2106 fSeenRootElement = true;
2107 rootElementSpecified(element);
2108 fStringPool.resetShuffleCount();
2109 }
2110 /***/
2111
2112 if (!fNamespacesEnabled) {
2113 attribute.clear();
2114 attribute.localpart = entityReader.scanName('=');
2115 attribute.rawname = attribute.localpart;
2116 }
2117 else {
2118 entityReader.scanQName('=', attribute);
2119 if (entityReader.lookingAtChar(':', false)) {
2120 fErrorReporter.reportError(fErrorReporter.getLocator(),
2121 XMLMessages.XML_DOMAIN,
2122 XMLMessages.MSG_TWO_COLONS_IN_QNAME,
2123 XMLMessages.P5_INVALID_CHARACTER,
2124 null,
2125 XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
2126 entityReader.skipPastNmtoken(' ');
2127 }
2128 }
2129
2130 } // scanAttributeName(XMLEntityHandler.EntityReader,QName,QName)
2131
2132 /** Scan doctype declaration. */
2133 private void scanDoctypeDecl(boolean standalone) throws Exception {
2134
2135 fScanningDTD = true;
2136
2137 /***
2138 fScanningDTD = true;
2139 fCheckedForSchema = true;
2140 /***/
2141 fSeenDoctypeDecl = true;
2142 /***
2143 fStandaloneReader = standalone ? fEntityHandler.getReaderId() : -1;
2144 fDeclsAreExternal = false;
2145 if (fDTDImporter == null) {
2146 fDTDImporter = new DTDImporter(fStringPool, fErrorReporter, fEntityHandler, this);
2147 }
2148 else {
2149 fDTDImporter.reset(fStringPool);
2150 }
2151 fDTDImporter.initHandlers(fDTDHandler);
2152 fDTDImporter.setValidating(fValidating);
2153 fDTDImporter.setNamespacesEnabled(fNamespacesEnabled);
2154 if (fDTDImporter.scanDoctypeDecl(standalone) && fValidating) {
2155 // check declared elements
2156 if (fWarningOnUndeclaredElements) {
2157 // REVISIT: comment out because won't compile
2158 // checkDeclaredElements();
2159 }
2160
2161 // check required notations
2162 fEntityHandler.checkRequiredNotations();
2163 }
2164 fScanningDTD = false;
2165 /***/
2166 if (fDTDScanner == null) {
2167 fDTDScanner = new XMLDTDScanner(fStringPool, fErrorReporter, fEntityHandler, new ChunkyCharArray(fStringPool));
2168 fDTDScanner.setValidationEnabled(fValidationEnabled);
2169 fDTDScanner.setNamespacesEnabled(fNamespacesEnabled);
2170 }
2171 else {
2172 fDTDScanner.reset(fStringPool, new ChunkyCharArray(fStringPool));
2173 }
2174 fDTDScanner.setDTDHandler(fDTDHandler);
2175 fDTDScanner.setGrammarResolver(fGrammarResolver);
2176 // REVISIT: What about standalone?
2177 if (fDTDScanner.scanDoctypeDecl()) {
2178 if (fDTDScanner.getReadingExternalEntity()) {
2179 fDTDScanner.scanDecls(true);
2180 }
2181 // REVISIT: What about validation and checking stuff?
2182 }
2183 //VC_NOTATION_DECLARED
2184 if (fValidationEnabled) {
2185 ((DefaultEntityHandler)fEntityHandler).checkRequiredNotations();
2186 }
2187 /***/
2188 fScanningDTD = false;
2189
2190 } // scanDoctypeDecl(boolean)
2191
2192 /** Scan attribute value. */
2193 private int scanAttValue(QName element, QName attribute) throws Exception {
2194
2195 //fAttrNameLocator = getLocatorImpl(fAttrNameLocator);
2196 int attValue = scanAttValue(element, attribute, fValidationEnabled);
2197 if (attValue == -1) {
2198 return XMLDocumentScanner.RESULT_FAILURE;
2199 }
2200
2201
2202 /***
2203 // REVISIT: This is validation related.
2204 if (!fValidating && fAttDefCount == 0) {
2205 int attType = fCDATASymbol;
2206 if (fAttrListHandle == -1)
2207 fAttrListHandle = fAttrList.startAttrList();
2208 // REVISIT: Should this be localpart or rawname?
2209 if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2210 return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2211 }
2212 return XMLDocumentScanner.RESULT_SUCCESS;
2213 }
2214 /****/
2215
2216 /****
2217 // REVISIT: Validation. What should these be?
2218 int attDefIndex = getAttDef(element, attribute);
2219 if (attDefIndex == -1) {
2220
2221 if (fValidating) {
2222 // REVISIT - cache the elem/attr tuple so that we only give
2223 // this error once for each unique occurrence
2224 Object[] args = { fStringPool.toString(element.rawname),
2225 fStringPool.toString(attribute.rawname) };
2226 fErrorReporter.reportError(fAttrNameLocator,
2227 XMLMessages.XML_DOMAIN,
2228 XMLMessages.MSG_ATTRIBUTE_NOT_DECLARED,
2229 XMLMessages.VC_ATTRIBUTE_VALUE_TYPE,
2230 args,
2231 XMLErrorReporter.ERRORTYPE_RECOVERABLE_ERROR);
2232 }
2233
2234 int attType = fCDATASymbol;
2235 if (fAttrListHandle == -1) {
2236 fAttrListHandle = fAttrList.startAttrList();
2237 }
2238 // REVISIT: Validation. What should the name be?
2239 if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2240 return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2241 }
2242 return XMLDocumentScanner.RESULT_SUCCESS;
2243 }
2244 /****/
2245
2246 /****
2247 int attType = getAttType(attDefIndex);
2248 if (attType != fCDATASymbol) {
2249 AttributeValidator av = getAttributeValidator(attDefIndex);
2250 int enumHandle = getEnumeration(attDefIndex);
2251 // REVISIT: Validation. What should these be?
2252 attValue = av.normalize(element, attribute,
2253 attValue, attType, enumHandle);
2254 }
2255
2256 if (fAttrListHandle == -1) {
2257 fAttrListHandle = fAttrList.startAttrList();
2258 }
2259 // REVISIT: Validation. What should the name be?
2260 if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2261 return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2262 }
2263 /***/
2264
2265 return XMLDocumentScanner.RESULT_SUCCESS;
2266
2267 } // scanAttValue(QName,QName):int
2268
2269 /** Returns true if the version number is valid. */
2270 private boolean validVersionNum(String version) {
2271 return XMLCharacterProperties.validVersionNum(version);
2272 }
2273
2274 /** Returns true if the encoding name is valid. */
2275 private boolean validEncName(String encoding) {
2276 return XMLCharacterProperties.validEncName(encoding);
2277 }
2278
2279} // class XMLDocumentScanner