1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.xerces.impl;
19
20 import java.io.CharConversionException;
21 import java.io.EOFException;
22 import java.io.IOException;
23
24 import org.apache.xerces.impl.dtd.XMLDTDDescription;
25 import org.apache.xerces.impl.io.MalformedByteSequenceException;
26 import org.apache.xerces.impl.msg.XMLMessageFormatter;
27 import org.apache.xerces.impl.validation.ValidationManager;
28 import org.apache.xerces.util.NamespaceSupport;
29 import org.apache.xerces.util.XMLChar;
30 import org.apache.xerces.util.XMLStringBuffer;
31 import org.apache.xerces.xni.Augmentations;
32 import org.apache.xerces.xni.NamespaceContext;
33 import org.apache.xerces.xni.XMLResourceIdentifier;
34 import org.apache.xerces.xni.XMLString;
35 import org.apache.xerces.xni.XNIException;
36 import org.apache.xerces.xni.parser.XMLComponentManager;
37 import org.apache.xerces.xni.parser.XMLConfigurationException;
38 import org.apache.xerces.xni.parser.XMLDTDScanner;
39 import org.apache.xerces.xni.parser.XMLInputSource;
40
41 /**
42 * This class is responsible for scanning XML document structure
43 * and content. The scanner acts as the source for the document
44 * information which is communicated to the document handler.
45 * <p>
46 * This component requires the following features and properties from the
47 * component manager that uses it:
48 * <ul>
49 * <li>http://xml.org/sax/features/namespaces</li>
50 * <li>http://xml.org/sax/features/validation</li>
51 * <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
52 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
53 * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
54 * <li>http://apache.org/xml/properties/internal/symbol-table</li>
55 * <li>http://apache.org/xml/properties/internal/error-reporter</li>
56 * <li>http://apache.org/xml/properties/internal/entity-manager</li>
57 * <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
58 * </ul>
59 *
60 * @xerces.internal
61 *
62 * @author Glenn Marcy, IBM
63 * @author Andy Clark, IBM
64 * @author Arnaud Le Hors, IBM
65 * @author Eric Ye, IBM
66 *
67 * @version $Id: XMLDocumentScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $
68 */
69 public class XMLDocumentScannerImpl
70 extends XMLDocumentFragmentScannerImpl {
71
72 //
73 // Constants
74 //
75
76 // scanner states
77
78 /** Scanner state: XML declaration. */
79 protected static final int SCANNER_STATE_XML_DECL = 0;
80
81 /** Scanner state: prolog. */
82 protected static final int SCANNER_STATE_PROLOG = 5;
83
84 /** Scanner state: trailing misc. */
85 protected static final int SCANNER_STATE_TRAILING_MISC = 12;
86
87 /** Scanner state: DTD internal declarations. */
88 protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 17;
89
90 /** Scanner state: open DTD external subset. */
91 protected static final int SCANNER_STATE_DTD_EXTERNAL = 18;
92
93 /** Scanner state: DTD external declarations. */
94 protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 19;
95
96 // feature identifiers
97
98 /** Feature identifier: load external DTD. */
99 protected static final String LOAD_EXTERNAL_DTD =
100 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE;
101
102 /** Feature identifier: load external DTD. */
103 protected static final String DISALLOW_DOCTYPE_DECL_FEATURE =
104 Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE;
105
106 // property identifiers
107
108 /** Property identifier: DTD scanner. */
109 protected static final String DTD_SCANNER =
110 Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY;
111
112 /** property identifier: ValidationManager */
113 protected static final String VALIDATION_MANAGER =
114 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY;
115
116 /** property identifier: NamespaceContext */
117 protected static final String NAMESPACE_CONTEXT =
118 Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY;
119
120
121
122 // recognized features and properties
123
124 /** Recognized features. */
125 private static final String[] RECOGNIZED_FEATURES = {
126 LOAD_EXTERNAL_DTD,
127 DISALLOW_DOCTYPE_DECL_FEATURE,
128 };
129
130 /** Feature defaults. */
131 private static final Boolean[] FEATURE_DEFAULTS = {
132 Boolean.TRUE,
133 Boolean.FALSE,
134 };
135
136 /** Recognized properties. */
137 private static final String[] RECOGNIZED_PROPERTIES = {
138 DTD_SCANNER,
139 VALIDATION_MANAGER,
140 NAMESPACE_CONTEXT,
141 };
142
143 /** Property defaults. */
144 private static final Object[] PROPERTY_DEFAULTS = {
145 null,
146 null,
147 null,
148 };
149
150 //
151 // Data
152 //
153
154 // properties
155
156 /** DTD scanner. */
157 protected XMLDTDScanner fDTDScanner;
158 /** Validation manager . */
159 protected ValidationManager fValidationManager;
160
161 // protected data
162
163 /** Scanning DTD. */
164 protected boolean fScanningDTD;
165
166 // other info
167
168 /** Doctype name. */
169 protected String fDoctypeName;
170
171 /** Doctype declaration public identifier. */
172 protected String fDoctypePublicId;
173
174 /** Doctype declaration system identifier. */
175 protected String fDoctypeSystemId;
176
177 /** Namespace support. */
178 protected NamespaceContext fNamespaceContext = new NamespaceSupport();
179
180 // features
181
182 /** Load external DTD. */
183 protected boolean fLoadExternalDTD = true;
184
185 /** Disallow doctype declaration. */
186 protected boolean fDisallowDoctype = false;
187
188 // state
189
190 /** Seen doctype declaration. */
191 protected boolean fSeenDoctypeDecl;
192
193 // dispatchers
194
195 /** XML declaration dispatcher. */
196 protected final Dispatcher fXMLDeclDispatcher = new XMLDeclDispatcher();
197
198 /** Prolog dispatcher. */
199 protected final Dispatcher fPrologDispatcher = new PrologDispatcher();
200
201 /** DTD dispatcher. */
202 protected final Dispatcher fDTDDispatcher = new DTDDispatcher();
203
204 /** Trailing miscellaneous section dispatcher. */
205 protected final Dispatcher fTrailingMiscDispatcher = new TrailingMiscDispatcher();
206
207 // temporary variables
208
209 /** Array of 3 strings. */
210 private final String[] fStrings = new String[3];
211
212 /** String. */
213 private final XMLString fString = new XMLString();
214
215 /** String buffer. */
216 private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
217
218 /** External subset source. */
219 private XMLInputSource fExternalSubsetSource = null;
220
221 /** A DTD Description. */
222 private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null);
223
224 //
225 // Constructors
226 //
227
228 /** Default constructor. */
229 public XMLDocumentScannerImpl() {} // <init>()
230
231 //
232 // XMLDocumentScanner methods
233 //
234
235 /**
236 * Sets the input source.
237 *
238 * @param inputSource The input source.
239 *
240 * @throws IOException Thrown on i/o error.
241 */
242 public void setInputSource(XMLInputSource inputSource) throws IOException {
243 fEntityManager.setEntityHandler(this);
244 fEntityManager.startDocumentEntity(inputSource);
245 //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
246 } // setInputSource(XMLInputSource)
247
248 //
249 // XMLComponent methods
250 //
251
252 /**
253 * Resets the component. The component can query the component manager
254 * about any features and properties that affect the operation of the
255 * component.
256 *
257 * @param componentManager The component manager.
258 *
259 * @throws SAXException Thrown by component on initialization error.
260 * For example, if a feature or property is
261 * required for the operation of the component, the
262 * component manager may throw a
263 * SAXNotRecognizedException or a
264 * SAXNotSupportedException.
265 */
266 public void reset(XMLComponentManager componentManager)
267 throws XMLConfigurationException {
268
269 super.reset(componentManager);
270
271 // other settings
272 fDoctypeName = null;
273 fDoctypePublicId = null;
274 fDoctypeSystemId = null;
275 fSeenDoctypeDecl = false;
276 fScanningDTD = false;
277 fExternalSubsetSource = null;
278
279 if (!fParserSettings) {
280 // parser settings have not been changed
281 fNamespaceContext.reset();
282 // setup dispatcher
283 setScannerState(SCANNER_STATE_XML_DECL);
284 setDispatcher(fXMLDeclDispatcher);
285 return;
286 }
287
288 // xerces features
289 try {
290 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD);
291 }
292 catch (XMLConfigurationException e) {
293 fLoadExternalDTD = true;
294 }
295 try {
296 fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE);
297 }
298 catch (XMLConfigurationException e) {
299 fDisallowDoctype = false;
300 }
301
302 // xerces properties
303 fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER);
304 try {
305 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER);
306 }
307 catch (XMLConfigurationException e) {
308 fValidationManager = null;
309 }
310
311 try {
312 fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT);
313 }
314 catch (XMLConfigurationException e) { }
315 if (fNamespaceContext == null) {
316 fNamespaceContext = new NamespaceSupport();
317 }
318 fNamespaceContext.reset();
319
320 // setup dispatcher
321 setScannerState(SCANNER_STATE_XML_DECL);
322 setDispatcher(fXMLDeclDispatcher);
323
324 } // reset(XMLComponentManager)
325
326 /**
327 * Returns a list of feature identifiers that are recognized by
328 * this component. This method may return null if no features
329 * are recognized by this component.
330 */
331 public String[] getRecognizedFeatures() {
332 String[] featureIds = super.getRecognizedFeatures();
333 int length = featureIds != null ? featureIds.length : 0;
334 String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length];
335 if (featureIds != null) {
336 System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length);
337 }
338 System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length);
339 return combinedFeatureIds;
340 } // getRecognizedFeatures():String[]
341
342 /**
343 * Sets the state of a feature. This method is called by the component
344 * manager any time after reset when a feature changes state.
345 * <p>
346 * <strong>Note:</strong> Components should silently ignore features
347 * that do not affect the operation of the component.
348 *
349 * @param featureId The feature identifier.
350 * @param state The state of the feature.
351 *
352 * @throws SAXNotRecognizedException The component should not throw
353 * this exception.
354 * @throws SAXNotSupportedException The component should not throw
355 * this exception.
356 */
357 public void setFeature(String featureId, boolean state)
358 throws XMLConfigurationException {
359
360 super.setFeature(featureId, state);
361
362 // Xerces properties
363 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
364 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
365
366 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() &&
367 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) {
368 fLoadExternalDTD = state;
369 return;
370 }
371 else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() &&
372 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) {
373 fDisallowDoctype = state;
374 return;
375 }
376 }
377
378 } // setFeature(String,boolean)
379
380 /**
381 * Returns a list of property identifiers that are recognized by
382 * this component. This method may return null if no properties
383 * are recognized by this component.
384 */
385 public String[] getRecognizedProperties() {
386 String[] propertyIds = super.getRecognizedProperties();
387 int length = propertyIds != null ? propertyIds.length : 0;
388 String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length];
389 if (propertyIds != null) {
390 System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length);
391 }
392 System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length);
393 return combinedPropertyIds;
394 } // getRecognizedProperties():String[]
395
396 /**
397 * Sets the value of a property. This method is called by the component
398 * manager any time after reset when a property changes value.
399 * <p>
400 * <strong>Note:</strong> Components should silently ignore properties
401 * that do not affect the operation of the component.
402 *
403 * @param propertyId The property identifier.
404 * @param value The value of the property.
405 *
406 * @throws SAXNotRecognizedException The component should not throw
407 * this exception.
408 * @throws SAXNotSupportedException The component should not throw
409 * this exception.
410 */
411 public void setProperty(String propertyId, Object value)
412 throws XMLConfigurationException {
413
414 super.setProperty(propertyId, value);
415
416 // Xerces properties
417 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
418 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
419
420 if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() &&
421 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) {
422 fDTDScanner = (XMLDTDScanner)value;
423 }
424 if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() &&
425 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) {
426 if (value != null) {
427 fNamespaceContext = (NamespaceContext)value;
428 }
429 }
430
431 return;
432 }
433
434 } // setProperty(String,Object)
435
436 /**
437 * Returns the default state for a feature, or null if this
438 * component does not want to report a default value for this
439 * feature.
440 *
441 * @param featureId The feature identifier.
442 *
443 * @since Xerces 2.2.0
444 */
445 public Boolean getFeatureDefault(String featureId) {
446
447 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
448 if (RECOGNIZED_FEATURES[i].equals(featureId)) {
449 return FEATURE_DEFAULTS[i];
450 }
451 }
452 return super.getFeatureDefault(featureId);
453 } // getFeatureDefault(String):Boolean
454
455 /**
456 * Returns the default state for a property, or null if this
457 * component does not want to report a default value for this
458 * property.
459 *
460 * @param propertyId The property identifier.
461 *
462 * @since Xerces 2.2.0
463 */
464 public Object getPropertyDefault(String propertyId) {
465 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
466 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
467 return PROPERTY_DEFAULTS[i];
468 }
469 }
470 return super.getPropertyDefault(propertyId);
471 } // getPropertyDefault(String):Object
472
473 //
474 // XMLEntityHandler methods
475 //
476
477 /**
478 * This method notifies of the start of an entity. The DTD has the
479 * pseudo-name of "[dtd]" parameter entity names start with '%'; and
480 * general entities are just specified by their name.
481 *
482 * @param name The name of the entity.
483 * @param identifier The resource identifier.
484 * @param encoding The auto-detected IANA encoding name of the entity
485 * stream. This value will be null in those situations
486 * where the entity encoding is not auto-detected (e.g.
487 * internal entities or a document entity that is
488 * parsed from a java.io.Reader).
489 *
490 * @throws XNIException Thrown by handler to signal an error.
491 */
492 public void startEntity(String name,
493 XMLResourceIdentifier identifier,
494 String encoding, Augmentations augs) throws XNIException {
495
496 super.startEntity(name, identifier, encoding, augs);
497
498 // prepare to look for a TextDecl if external general entity
499 if (!name.equals("[xml]") && fEntityScanner.isExternal()) {
500 setScannerState(SCANNER_STATE_TEXT_DECL);
501 }
502
503 // call handler
504 if (fDocumentHandler != null && name.equals("[xml]")) {
505 fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);
506 }
507
508 } // startEntity(String,identifier,String)
509
510 /**
511 * This method notifies the end of an entity. The DTD has the pseudo-name
512 * of "[dtd]" parameter entity names start with '%'; and general entities
513 * are just specified by their name.
514 *
515 * @param name The name of the entity.
516 *
517 * @throws XNIException Thrown by handler to signal an error.
518 */
519 public void endEntity(String name, Augmentations augs) throws XNIException {
520
521 super.endEntity(name, augs);
522
523 // call handler
524 if (fDocumentHandler != null && name.equals("[xml]")) {
525 fDocumentHandler.endDocument(null);
526 }
527
528 } // endEntity(String)
529
530 //
531 // Protected methods
532 //
533
534 // dispatcher factory methods
535
536 /** Creates a content dispatcher. */
537 protected Dispatcher createContentDispatcher() {
538 return new ContentDispatcher();
539 } // createContentDispatcher():Dispatcher
540
541 // scanning methods
542
543 /** Scans a doctype declaration. */
544 protected boolean scanDoctypeDecl() throws IOException, XNIException {
545
546 // spaces
547 if (!fEntityScanner.skipSpaces()) {
548 reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL",
549 null);
550 }
551
552 // root element name
553 fDoctypeName = fEntityScanner.scanName();
554 if (fDoctypeName == null) {
555 reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null);
556 }
557
558 // external id
559 if (fEntityScanner.skipSpaces()) {
560 scanExternalID(fStrings, false);
561 fDoctypeSystemId = fStrings[0];
562 fDoctypePublicId = fStrings[1];
563 fEntityScanner.skipSpaces();
564 }
565
566 fHasExternalDTD = fDoctypeSystemId != null;
567
568 // Attempt to locate an external subset with an external subset resolver.
569 if (!fHasExternalDTD && fExternalSubsetResolver != null) {
570 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null);
571 fDTDDescription.setRootName(fDoctypeName);
572 fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription);
573 fHasExternalDTD = fExternalSubsetSource != null;
574 }
575
576 // call handler
577 if (fDocumentHandler != null) {
578 // NOTE: I don't like calling the doctypeDecl callback until
579 // end of the *full* doctype line (including internal
580 // subset) is parsed correctly but SAX2 requires that
581 // it knows the root element name and public and system
582 // identifier for the startDTD call. -Ac
583 if (fExternalSubsetSource == null) {
584 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
585 }
586 else {
587 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null);
588 }
589 }
590
591 // is there an internal subset?
592 boolean internalSubset = true;
593 if (!fEntityScanner.skipChar('[')) {
594 internalSubset = false;
595 fEntityScanner.skipSpaces();
596 if (!fEntityScanner.skipChar('>')) {
597 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
598 }
599 fMarkupDepth--;
600 }
601
602 return internalSubset;
603
604 } // scanDoctypeDecl():boolean
605
606 //
607 // Private methods
608 //
609
610 /** Returns the scanner state name. */
611 protected String getScannerStateName(int state) {
612
613 switch (state) {
614 case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL";
615 case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG";
616 case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC";
617 case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS";
618 case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL";
619 case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS";
620 }
621 return super.getScannerStateName(state);
622
623 } // getScannerStateName(int):String
624
625 //
626 // Classes
627 //
628
629 /**
630 * Dispatcher to handle XMLDecl scanning.
631 *
632 * @author Andy Clark, IBM
633 */
634 protected final class XMLDeclDispatcher
635 implements Dispatcher {
636
637 //
638 // Dispatcher methods
639 //
640
641 /**
642 * Dispatch an XML "event".
643 *
644 * @param complete True if this dispatcher is intended to scan
645 * and dispatch as much as possible.
646 *
647 * @return True if there is more to dispatch either from this
648 * or a another dispatcher.
649 *
650 * @throws IOException Thrown on i/o error.
651 * @throws XNIException Thrown on parse error.
652 */
653 public boolean dispatch(boolean complete)
654 throws IOException, XNIException {
655
656 // next dispatcher is prolog regardless of whether there
657 // is an XMLDecl in this document
658 setScannerState(SCANNER_STATE_PROLOG);
659 setDispatcher(fPrologDispatcher);
660
661 // scan XMLDecl
662 try {
663 if (fEntityScanner.skipString("<?xml")) {
664 fMarkupDepth++;
665 // NOTE: special case where document starts with a PI
666 // whose name starts with "xml" (e.g. "xmlfoo")
667 if (XMLChar.isName(fEntityScanner.peekChar())) {
668 fStringBuffer.clear();
669 fStringBuffer.append("xml");
670 if (fNamespaces) {
671 while (XMLChar.isNCName(fEntityScanner.peekChar())) {
672 fStringBuffer.append((char)fEntityScanner.scanChar());
673 }
674 }
675 else {
676 while (XMLChar.isName(fEntityScanner.peekChar())) {
677 fStringBuffer.append((char)fEntityScanner.scanChar());
678 }
679 }
680 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
681 scanPIData(target, fString);
682 }
683
684 // standard XML declaration
685 else {
686 scanXMLDeclOrTextDecl(false);
687 }
688 }
689 fEntityManager.fCurrentEntity.mayReadChunks = true;
690
691 // if no XMLDecl, then scan piece of prolog
692 return true;
693 }
694 // encoding errors
695 catch (MalformedByteSequenceException e) {
696 fErrorReporter.reportError(e.getDomain(), e.getKey(),
697 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
698 return false;
699 }
700 catch (CharConversionException e) {
701 fErrorReporter.reportError(
702 XMLMessageFormatter.XML_DOMAIN,
703 "CharConversionFailure",
704 null,
705 XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
706 return false;
707 }
708 // premature end of file
709 catch (EOFException e) {
710 reportFatalError("PrematureEOF", null);
711 return false;
712 //throw e;
713 }
714
715
716 } // dispatch(boolean):boolean
717
718 } // class XMLDeclDispatcher
719
720 /**
721 * Dispatcher to handle prolog scanning.
722 *
723 * @author Andy Clark, IBM
724 */
725 protected final class PrologDispatcher
726 implements Dispatcher {
727
728 //
729 // Dispatcher methods
730 //
731
732 /**
733 * Dispatch an XML "event".
734 *
735 * @param complete True if this dispatcher is intended to scan
736 * and dispatch as much as possible.
737 *
738 * @return True if there is more to dispatch either from this
739 * or a another dispatcher.
740 *
741 * @throws IOException Thrown on i/o error.
742 * @throws XNIException Thrown on parse error.
743 */
744 public boolean dispatch(boolean complete)
745 throws IOException, XNIException {
746
747 try {
748 boolean again;
749 do {
750 again = false;
751 switch (fScannerState) {
752 case SCANNER_STATE_PROLOG: {
753 fEntityScanner.skipSpaces();
754 if (fEntityScanner.skipChar('<')) {
755 setScannerState(SCANNER_STATE_START_OF_MARKUP);
756 again = true;
757 }
758 else if (fEntityScanner.skipChar('&')) {
759 setScannerState(SCANNER_STATE_REFERENCE);
760 again = true;
761 }
762 else {
763 setScannerState(SCANNER_STATE_CONTENT);
764 again = true;
765 }
766 break;
767 }
768 case SCANNER_STATE_START_OF_MARKUP: {
769 fMarkupDepth++;
770 if (fEntityScanner.skipChar('!')) {
771 if (fEntityScanner.skipChar('-')) {
772 if (!fEntityScanner.skipChar('-')) {
773 reportFatalError("InvalidCommentStart",
774 null);
775 }
776 setScannerState(SCANNER_STATE_COMMENT);
777 again = true;
778 }
779 else if (fEntityScanner.skipString("DOCTYPE")) {
780 setScannerState(SCANNER_STATE_DOCTYPE);
781 again = true;
782 }
783 else {
784 reportFatalError("MarkupNotRecognizedInProlog",
785 null);
786 }
787 }
788 else if (isValidNameStartChar(fEntityScanner.peekChar())) {
789 setScannerState(SCANNER_STATE_ROOT_ELEMENT);
790 setDispatcher(fContentDispatcher);
791 return true;
792 }
793 else if (fEntityScanner.skipChar('?')) {
794 setScannerState(SCANNER_STATE_PI);
795 again = true;
796 }
797 else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
798 setScannerState(SCANNER_STATE_ROOT_ELEMENT);
799 setDispatcher(fContentDispatcher);
800 return true;
801 }
802 else {
803 reportFatalError("MarkupNotRecognizedInProlog",
804 null);
805 }
806 break;
807 }
808 case SCANNER_STATE_COMMENT: {
809 scanComment();
810 setScannerState(SCANNER_STATE_PROLOG);
811 break;
812 }
813 case SCANNER_STATE_PI: {
814 scanPI();
815 setScannerState(SCANNER_STATE_PROLOG);
816 break;
817 }
818 case SCANNER_STATE_DOCTYPE: {
819 if (fDisallowDoctype) {
820 reportFatalError("DoctypeNotAllowed", null);
821 }
822 if (fSeenDoctypeDecl) {
823 reportFatalError("AlreadySeenDoctype", null);
824 }
825 fSeenDoctypeDecl = true;
826
827 // scanDoctypeDecl() sends XNI doctypeDecl event that
828 // in SAX is converted to startDTD() event.
829 if (scanDoctypeDecl()) {
830 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS);
831 setDispatcher(fDTDDispatcher);
832 return true;
833 }
834
835 // handle external subset
836 if (fDoctypeSystemId != null) {
837 fIsEntityDeclaredVC = !fStandalone;
838 if (((fValidation || fLoadExternalDTD)
839 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
840 setScannerState(SCANNER_STATE_DTD_EXTERNAL);
841 setDispatcher(fDTDDispatcher);
842 return true;
843 }
844 }
845 else if (fExternalSubsetSource != null) {
846 fIsEntityDeclaredVC = !fStandalone;
847 if (((fValidation || fLoadExternalDTD)
848 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
849 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset.
850 fDTDScanner.setInputSource(fExternalSubsetSource);
851 fExternalSubsetSource = null;
852 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
853 setDispatcher(fDTDDispatcher);
854 return true;
855 }
856 }
857
858 // Send endDTD() call if:
859 // a) systemId is null or if an external subset resolver could not locate an external subset.
860 // b) "load-external-dtd" and validation are false
861 // c) DTD grammar is cached
862
863 // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD
864 // in SAX this results in 2 events: startDTD, endDTD
865 fDTDScanner.setInputSource(null);
866 setScannerState(SCANNER_STATE_PROLOG);
867 break;
868 }
869 case SCANNER_STATE_CONTENT: {
870 reportFatalError("ContentIllegalInProlog", null);
871 fEntityScanner.scanChar();
872 }
873 case SCANNER_STATE_REFERENCE: {
874 reportFatalError("ReferenceIllegalInProlog", null);
875 }
876 }
877 } while (complete || again);
878
879 if (complete) {
880 if (fEntityScanner.scanChar() != '<') {
881 reportFatalError("RootElementRequired", null);
882 }
883 setScannerState(SCANNER_STATE_ROOT_ELEMENT);
884 setDispatcher(fContentDispatcher);
885 }
886 }
887 // encoding errors
888 catch (MalformedByteSequenceException e) {
889 fErrorReporter.reportError(e.getDomain(), e.getKey(),
890 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
891 return false;
892 }
893 catch (CharConversionException e) {
894 fErrorReporter.reportError(
895 XMLMessageFormatter.XML_DOMAIN,
896 "CharConversionFailure",
897 null,
898 XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
899 return false;
900 }
901 // premature end of file
902 catch (EOFException e) {
903 reportFatalError("PrematureEOF", null);
904 return false;
905 //throw e;
906 }
907
908 return true;
909
910 } // dispatch(boolean):boolean
911
912 } // class PrologDispatcher
913
914 /**
915 * Dispatcher to handle the internal and external DTD subsets.
916 *
917 * @author Andy Clark, IBM
918 */
919 protected final class DTDDispatcher
920 implements Dispatcher {
921
922 //
923 // Dispatcher methods
924 //
925
926 /**
927 * Dispatch an XML "event".
928 *
929 * @param complete True if this dispatcher is intended to scan
930 * and dispatch as much as possible.
931 *
932 * @return True if there is more to dispatch either from this
933 * or a another dispatcher.
934 *
935 * @throws IOException Thrown on i/o error.
936 * @throws XNIException Thrown on parse error.
937 */
938 public boolean dispatch(boolean complete)
939 throws IOException, XNIException {
940 fEntityManager.setEntityHandler(null);
941 try {
942 boolean again;
943 do {
944 again = false;
945 switch (fScannerState) {
946 case SCANNER_STATE_DTD_INTERNAL_DECLS: {
947 // REVISIT: Should there be a feature for
948 // the "complete" parameter?
949 boolean completeDTD = true;
950 boolean readExternalSubset = (fValidation || fLoadExternalDTD) && (fValidationManager == null || !fValidationManager.isCachedDTD());
951 boolean moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && readExternalSubset);
952 if (!moreToScan) {
953 // end doctype declaration
954 if (!fEntityScanner.skipChar(']')) {
955 reportFatalError("EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET",
956 null);
957 }
958 fEntityScanner.skipSpaces();
959 if (!fEntityScanner.skipChar('>')) {
960 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
961 }
962 fMarkupDepth--;
963
964 // scan external subset next
965 if (fDoctypeSystemId != null) {
966 fIsEntityDeclaredVC = !fStandalone;
967 if (readExternalSubset) {
968 setScannerState(SCANNER_STATE_DTD_EXTERNAL);
969 break;
970 }
971 }
972 else if (fExternalSubsetSource != null) {
973 fIsEntityDeclaredVC = !fStandalone;
974 if (readExternalSubset) {
975 // This handles the case of a DOCTYPE that only had an internal subset.
976 fDTDScanner.setInputSource(fExternalSubsetSource);
977 fExternalSubsetSource = null;
978 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
979 break;
980 }
981 }
982 // This document only has an internal subset. If it contains parameter entity
983 // references and standalone="no" then [Entity Declared] is a validity constraint.
984 else {
985 fIsEntityDeclaredVC = fEntityManager.hasPEReferences() && !fStandalone;
986 }
987
988 // break out of this dispatcher.
989 setScannerState(SCANNER_STATE_PROLOG);
990 setDispatcher(fPrologDispatcher);
991 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
992 return true;
993 }
994 break;
995 }
996 case SCANNER_STATE_DTD_EXTERNAL: {
997 fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null);
998 fDTDDescription.setRootName(fDoctypeName);
999 XMLInputSource xmlInputSource =
1000 fEntityManager.resolveEntity(fDTDDescription);
1001 fDTDScanner.setInputSource(xmlInputSource);
1002 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
1003 again = true;
1004 break;
1005 }
1006 case SCANNER_STATE_DTD_EXTERNAL_DECLS: {
1007 // REVISIT: Should there be a feature for
1008 // the "complete" parameter?
1009 boolean completeDTD = true;
1010 boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD);
1011 if (!moreToScan) {
1012 setScannerState(SCANNER_STATE_PROLOG);
1013 setDispatcher(fPrologDispatcher);
1014 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
1015 return true;
1016 }
1017 break;
1018 }
1019 default: {
1020 throw new XNIException("DTDDispatcher#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')');
1021 }
1022 }
1023 } while (complete || again);
1024 }
1025 // encoding errors
1026 catch (MalformedByteSequenceException e) {
1027 fErrorReporter.reportError(e.getDomain(), e.getKey(),
1028 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1029 return false;
1030 }
1031 catch (CharConversionException e) {
1032 fErrorReporter.reportError(
1033 XMLMessageFormatter.XML_DOMAIN,
1034 "CharConversionFailure",
1035 null,
1036 XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1037 return false;
1038 }
1039 // premature end of file
1040 catch (EOFException e) {
1041 reportFatalError("PrematureEOF", null);
1042 return false;
1043 //throw e;
1044 }
1045
1046 // cleanup
1047 finally {
1048 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
1049 }
1050
1051 return true;
1052
1053 } // dispatch(boolean):boolean
1054
1055 } // class DTDDispatcher
1056
1057 /**
1058 * Dispatcher to handle content scanning.
1059 *
1060 * @author Andy Clark, IBM
1061 * @author Eric Ye, IBM
1062 */
1063 protected class ContentDispatcher
1064 extends FragmentContentDispatcher {
1065
1066 //
1067 // Protected methods
1068 //
1069
1070 // hooks
1071
1072 // NOTE: These hook methods are added so that the full document
1073 // scanner can share the majority of code with this class.
1074
1075 /**
1076 * Scan for DOCTYPE hook. This method is a hook for subclasses
1077 * to add code to handle scanning for a the "DOCTYPE" string
1078 * after the string "<!" has been scanned.
1079 *
1080 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
1081 * was not scanned.
1082 */
1083 protected boolean scanForDoctypeHook()
1084 throws IOException, XNIException {
1085
1086 if (fEntityScanner.skipString("DOCTYPE")) {
1087 setScannerState(SCANNER_STATE_DOCTYPE);
1088 return true;
1089 }
1090 return false;
1091
1092 } // scanForDoctypeHook():boolean
1093
1094 /**
1095 * Element depth iz zero. This methos is a hook for subclasses
1096 * to add code to handle when the element depth hits zero. When
1097 * scanning a document fragment, an element depth of zero is
1098 * normal. However, when scanning a full XML document, the
1099 * scanner must handle the trailing miscellanous section of
1100 * the document after the end of the document's root element.
1101 *
1102 * @return True if the caller should stop and return true which
1103 * allows the scanner to switch to a new scanning
1104 * dispatcher. A return value of false indicates that
1105 * the content dispatcher should continue as normal.
1106 */
1107 protected boolean elementDepthIsZeroHook()
1108 throws IOException, XNIException {
1109
1110 setScannerState(SCANNER_STATE_TRAILING_MISC);
1111 setDispatcher(fTrailingMiscDispatcher);
1112 return true;
1113
1114 } // elementDepthIsZeroHook():boolean
1115
1116 /**
1117 * Scan for root element hook. This method is a hook for
1118 * subclasses to add code that handles scanning for the root
1119 * element. When scanning a document fragment, there is no
1120 * "root" element. However, when scanning a full XML document,
1121 * the scanner must handle the root element specially.
1122 *
1123 * @return True if the caller should stop and return true which
1124 * allows the scanner to switch to a new scanning
1125 * dispatcher. A return value of false indicates that
1126 * the content dispatcher should continue as normal.
1127 */
1128 protected boolean scanRootElementHook()
1129 throws IOException, XNIException {
1130
1131 if (fExternalSubsetResolver != null && !fSeenDoctypeDecl
1132 && !fDisallowDoctype && (fValidation || fLoadExternalDTD)) {
1133 scanStartElementName();
1134 resolveExternalSubsetAndRead();
1135 if (scanStartElementAfterName()) {
1136 setScannerState(SCANNER_STATE_TRAILING_MISC);
1137 setDispatcher(fTrailingMiscDispatcher);
1138 return true;
1139 }
1140 }
1141 else if (scanStartElement()) {
1142 setScannerState(SCANNER_STATE_TRAILING_MISC);
1143 setDispatcher(fTrailingMiscDispatcher);
1144 return true;
1145 }
1146 return false;
1147
1148 } // scanRootElementHook():boolean
1149
1150 /**
1151 * End of file hook. This method is a hook for subclasses to
1152 * add code that handles the end of file. The end of file in
1153 * a document fragment is OK if the markup depth is zero.
1154 * However, when scanning a full XML document, an end of file
1155 * is always premature.
1156 */
1157 protected void endOfFileHook(EOFException e)
1158 throws IOException, XNIException {
1159
1160 reportFatalError("PrematureEOF", null);
1161 // in case continue-after-fatal-error set, should not do this...
1162 //throw e;
1163
1164 } // endOfFileHook()
1165
1166 /**
1167 * <p>Attempt to locate an external subset for a document that does not otherwise
1168 * have one. If an external subset is located, then it is scanned.</p>
1169 */
1170 protected void resolveExternalSubsetAndRead()
1171 throws IOException, XNIException {
1172
1173 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null);
1174 fDTDDescription.setRootName(fElementQName.rawname);
1175 XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription);
1176
1177 if (src != null) {
1178 fDoctypeName = fElementQName.rawname;
1179 fDoctypePublicId = src.getPublicId();
1180 fDoctypeSystemId = src.getSystemId();
1181 // call document handler
1182 if (fDocumentHandler != null) {
1183 // This inserts a doctypeDecl event into the stream though no
1184 // DOCTYPE existed in the instance document.
1185 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
1186 }
1187 try {
1188 if (fValidationManager == null || !fValidationManager.isCachedDTD()) {
1189 fDTDScanner.setInputSource(src);
1190 while (fDTDScanner.scanDTDExternalSubset(true));
1191 }
1192 else {
1193 // This sends startDTD and endDTD calls down the pipeline.
1194 fDTDScanner.setInputSource(null);
1195 }
1196 }
1197 finally {
1198 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
1199 }
1200 }
1201 } // resolveExternalSubsetAndRead()
1202
1203 } // class ContentDispatcher
1204
1205 /**
1206 * Dispatcher to handle trailing miscellaneous section scanning.
1207 *
1208 * @author Andy Clark, IBM
1209 * @author Eric Ye, IBM
1210 */
1211 protected final class TrailingMiscDispatcher
1212 implements Dispatcher {
1213
1214 //
1215 // Dispatcher methods
1216 //
1217
1218 /**
1219 * Dispatch an XML "event".
1220 *
1221 * @param complete True if this dispatcher is intended to scan
1222 * and dispatch as much as possible.
1223 *
1224 * @return True if there is more to dispatch either from this
1225 * or a another dispatcher.
1226 *
1227 * @throws IOException Thrown on i/o error.
1228 * @throws XNIException Thrown on parse error.
1229 */
1230 public boolean dispatch(boolean complete)
1231 throws IOException, XNIException {
1232
1233 try {
1234 boolean again;
1235 do {
1236 again = false;
1237 switch (fScannerState) {
1238 case SCANNER_STATE_TRAILING_MISC: {
1239 fEntityScanner.skipSpaces();
1240 if (fEntityScanner.skipChar('<')) {
1241 setScannerState(SCANNER_STATE_START_OF_MARKUP);
1242 again = true;
1243 }
1244 else {
1245 setScannerState(SCANNER_STATE_CONTENT);
1246 again = true;
1247 }
1248 break;
1249 }
1250 case SCANNER_STATE_START_OF_MARKUP: {
1251 fMarkupDepth++;
1252 if (fEntityScanner.skipChar('?')) {
1253 setScannerState(SCANNER_STATE_PI);
1254 again = true;
1255 }
1256 else if (fEntityScanner.skipChar('!')) {
1257 setScannerState(SCANNER_STATE_COMMENT);
1258 again = true;
1259 }
1260 else if (fEntityScanner.skipChar('/')) {
1261 reportFatalError("MarkupNotRecognizedInMisc",
1262 null);
1263 again = true;
1264 }
1265 else if (isValidNameStartChar(fEntityScanner.peekChar())) {
1266 reportFatalError("MarkupNotRecognizedInMisc",
1267 null);
1268 scanStartElement();
1269 setScannerState(SCANNER_STATE_CONTENT);
1270 }
1271 else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
1272 reportFatalError("MarkupNotRecognizedInMisc",
1273 null);
1274 scanStartElement();
1275 setScannerState(SCANNER_STATE_CONTENT);
1276 }
1277 else {
1278 reportFatalError("MarkupNotRecognizedInMisc",
1279 null);
1280 }
1281 break;
1282 }
1283 case SCANNER_STATE_PI: {
1284 scanPI();
1285 setScannerState(SCANNER_STATE_TRAILING_MISC);
1286 break;
1287 }
1288 case SCANNER_STATE_COMMENT: {
1289 if (!fEntityScanner.skipString("--")) {
1290 reportFatalError("InvalidCommentStart", null);
1291 }
1292 scanComment();
1293 setScannerState(SCANNER_STATE_TRAILING_MISC);
1294 break;
1295 }
1296 case SCANNER_STATE_CONTENT: {
1297 int ch = fEntityScanner.peekChar();
1298 if (ch == -1) {
1299 setScannerState(SCANNER_STATE_TERMINATED);
1300 return false;
1301 }
1302 reportFatalError("ContentIllegalInTrailingMisc",
1303 null);
1304 fEntityScanner.scanChar();
1305 setScannerState(SCANNER_STATE_TRAILING_MISC);
1306 break;
1307 }
1308 case SCANNER_STATE_REFERENCE: {
1309 reportFatalError("ReferenceIllegalInTrailingMisc",
1310 null);
1311 setScannerState(SCANNER_STATE_TRAILING_MISC);
1312 break;
1313 }
1314 case SCANNER_STATE_TERMINATED: {
1315 return false;
1316 }
1317 }
1318 } while (complete || again);
1319 }
1320 // encoding errors
1321 catch (MalformedByteSequenceException e) {
1322 fErrorReporter.reportError(e.getDomain(), e.getKey(),
1323 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1324 return false;
1325 }
1326 catch (CharConversionException e) {
1327 fErrorReporter.reportError(
1328 XMLMessageFormatter.XML_DOMAIN,
1329 "CharConversionFailure",
1330 null,
1331 XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1332 return false;
1333 }
1334 catch (EOFException e) {
1335 // NOTE: This is the only place we're allowed to reach
1336 // the real end of the document stream. Unless the
1337 // end of file was reached prematurely.
1338 if (fMarkupDepth != 0) {
1339 reportFatalError("PrematureEOF", null);
1340 return false;
1341 //throw e;
1342 }
1343
1344 setScannerState(SCANNER_STATE_TERMINATED);
1345 return false;
1346 }
1347
1348 return true;
1349
1350 } // dispatch(boolean):boolean
1351
1352 } // class TrailingMiscDispatcher
1353
1354 } // class XMLDocumentScannerImpl