1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.xerces.impl;
19
20 import java.io.CharConversionException;
21 import java.io.EOFException;
22 import java.io.IOException;
23
24 import org.apache.xerces.impl.io.MalformedByteSequenceException;
25 import org.apache.xerces.impl.msg.XMLMessageFormatter;
26 import org.apache.xerces.util.AugmentationsImpl;
27 import org.apache.xerces.util.XMLAttributesImpl;
28 import org.apache.xerces.util.XMLChar;
29 import org.apache.xerces.util.XMLStringBuffer;
30 import org.apache.xerces.util.XMLSymbols;
31 import org.apache.xerces.xni.Augmentations;
32 import org.apache.xerces.xni.QName;
33 import org.apache.xerces.xni.XMLAttributes;
34 import org.apache.xerces.xni.XMLDocumentHandler;
35 import org.apache.xerces.xni.XMLResourceIdentifier;
36 import org.apache.xerces.xni.XMLString;
37 import org.apache.xerces.xni.XNIException;
38 import org.apache.xerces.xni.parser.XMLComponent;
39 import org.apache.xerces.xni.parser.XMLComponentManager;
40 import org.apache.xerces.xni.parser.XMLConfigurationException;
41 import org.apache.xerces.xni.parser.XMLDocumentScanner;
42 import org.apache.xerces.xni.parser.XMLInputSource;
43
44 /**
45 * This class is responsible for scanning the structure and content
46 * of document fragments. The scanner acts as the source for the
47 * document information which is communicated to the document handler.
48 * <p>
49 * This component requires the following features and properties from the
50 * component manager that uses it:
51 * <ul>
52 * <li>http://xml.org/sax/features/validation</li>
53 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
54 * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
55 * <li>http://apache.org/xml/properties/internal/symbol-table</li>
56 * <li>http://apache.org/xml/properties/internal/error-reporter</li>
57 * <li>http://apache.org/xml/properties/internal/entity-manager</li>
58 * </ul>
59 *
60 * @xerces.internal
61 *
62 * @author Glenn Marcy, IBM
63 * @author Andy Clark, IBM
64 * @author Arnaud Le Hors, IBM
65 * @author Eric Ye, IBM
66 *
67 * @version $Id: XMLDocumentFragmentScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $
68 */
69 public class XMLDocumentFragmentScannerImpl
70 extends XMLScanner
71 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler {
72
73 //
74 // Constants
75 //
76
77 // scanner states
78
79 /** Scanner state: start of markup. */
80 protected static final int SCANNER_STATE_START_OF_MARKUP = 1;
81
82 /** Scanner state: comment. */
83 protected static final int SCANNER_STATE_COMMENT = 2;
84
85 /** Scanner state: processing instruction. */
86 protected static final int SCANNER_STATE_PI = 3;
87
88 /** Scanner state: DOCTYPE. */
89 protected static final int SCANNER_STATE_DOCTYPE = 4;
90
91 /** Scanner state: root element. */
92 protected static final int SCANNER_STATE_ROOT_ELEMENT = 6;
93
94 /** Scanner state: content. */
95 protected static final int SCANNER_STATE_CONTENT = 7;
96
97 /** Scanner state: reference. */
98 protected static final int SCANNER_STATE_REFERENCE = 8;
99
100 /** Scanner state: end of input. */
101 protected static final int SCANNER_STATE_END_OF_INPUT = 13;
102
103 /** Scanner state: terminated. */
104 protected static final int SCANNER_STATE_TERMINATED = 14;
105
106 /** Scanner state: CDATA section. */
107 protected static final int SCANNER_STATE_CDATA = 15;
108
109 /** Scanner state: Text declaration. */
110 protected static final int SCANNER_STATE_TEXT_DECL = 16;
111
112 // feature identifiers
113
114 /** Feature identifier: namespaces. */
115 protected static final String NAMESPACES =
116 Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
117
118 /** Feature identifier: notify built-in refereces. */
119 protected static final String NOTIFY_BUILTIN_REFS =
120 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
121
122 // property identifiers
123
124 /** Property identifier: entity resolver. */
125 protected static final String ENTITY_RESOLVER =
126 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
127
128 // recognized features and properties
129
130 /** Recognized features. */
131 private static final String[] RECOGNIZED_FEATURES = {
132 NAMESPACES,
133 VALIDATION,
134 NOTIFY_BUILTIN_REFS,
135 NOTIFY_CHAR_REFS,
136 };
137
138 /** Feature defaults. */
139 private static final Boolean[] FEATURE_DEFAULTS = {
140 null,
141 null,
142 Boolean.FALSE,
143 Boolean.FALSE,
144 };
145
146 /** Recognized properties. */
147 private static final String[] RECOGNIZED_PROPERTIES = {
148 SYMBOL_TABLE,
149 ERROR_REPORTER,
150 ENTITY_MANAGER,
151 ENTITY_RESOLVER,
152 };
153
154 /** Property defaults. */
155 private static final Object[] PROPERTY_DEFAULTS = {
156 null,
157 null,
158 null,
159 null,
160 };
161
162 // debugging
163
164 /** Debug scanner state. */
165 private static final boolean DEBUG_SCANNER_STATE = false;
166
167 /** Debug dispatcher. */
168 private static final boolean DEBUG_DISPATCHER = false;
169
170 /** Debug content dispatcher scanning. */
171 protected static final boolean DEBUG_CONTENT_SCANNING = false;
172
173 //
174 // Data
175 //
176
177 // protected data
178
179 /** Document handler. */
180 protected XMLDocumentHandler fDocumentHandler;
181
182 /** Entity stack. */
183 protected int[] fEntityStack = new int[4];
184
185 /** Markup depth. */
186 protected int fMarkupDepth;
187
188 /** Scanner state. */
189 protected int fScannerState;
190
191 /** SubScanner state: inside scanContent method. */
192 protected boolean fInScanContent = false;
193
194 /** has external dtd */
195 protected boolean fHasExternalDTD;
196
197 /** Standalone. */
198 protected boolean fStandalone;
199
200 /** True if [Entity Declared] is a VC; false if it is a WFC. */
201 protected boolean fIsEntityDeclaredVC;
202
203 /** External subset resolver. **/
204 protected ExternalSubsetResolver fExternalSubsetResolver;
205
206 // element information
207
208 /** Current element. */
209 protected QName fCurrentElement;
210
211 /** Element stack. */
212 protected final ElementStack fElementStack = new ElementStack();
213
214 // other info
215
216 /** Document system identifier.
217 * REVISIT: So what's this used for? - NG
218 * protected String fDocumentSystemId;
219 ******/
220
221 // features
222
223 /** Notify built-in references. */
224 protected boolean fNotifyBuiltInRefs = false;
225
226 // dispatchers
227
228 /** Active dispatcher. */
229 protected Dispatcher fDispatcher;
230
231 /** Content dispatcher. */
232 protected final Dispatcher fContentDispatcher = createContentDispatcher();
233
234 // temporary variables
235
236 /** Element QName. */
237 protected final QName fElementQName = new QName();
238
239 /** Attribute QName. */
240 protected final QName fAttributeQName = new QName();
241
242 /** Element attributes. */
243 protected final XMLAttributesImpl fAttributes = new XMLAttributesImpl();
244
245 /** String. */
246 protected final XMLString fTempString = new XMLString();
247
248 /** String. */
249 protected final XMLString fTempString2 = new XMLString();
250
251 /** Array of 3 strings. */
252 private final String[] fStrings = new String[3];
253
254 /** String buffer. */
255 private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
256
257 /** String buffer. */
258 private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
259
260 /** Another QName. */
261 private final QName fQName = new QName();
262
263 /** Single character array. */
264 private final char[] fSingleChar = new char[1];
265
266 /**
267 * Saw spaces after element name or between attributes.
268 *
269 * This is reserved for the case where scanning of a start element spans
270 * several methods, as is the case when scanning the start of a root element
271 * where a DTD external subset may be read after scanning the element name.
272 */
273 private boolean fSawSpace;
274
275 /** Reusable Augmentations. */
276 private Augmentations fTempAugmentations = null;
277
278 //
279 // Constructors
280 //
281
282 /** Default constructor. */
283 public XMLDocumentFragmentScannerImpl() {} // <init>()
284
285 //
286 // XMLDocumentScanner methods
287 //
288
289 /**
290 * Sets the input source.
291 *
292 * @param inputSource The input source.
293 *
294 * @throws IOException Thrown on i/o error.
295 */
296 public void setInputSource(XMLInputSource inputSource) throws IOException {
297 fEntityManager.setEntityHandler(this);
298 fEntityManager.startEntity("$fragment$", inputSource, false, true);
299 //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
300 } // setInputSource(XMLInputSource)
301
302 /**
303 * Scans a document.
304 *
305 * @param complete True if the scanner should scan the document
306 * completely, pushing all events to the registered
307 * document handler. A value of false indicates that
308 * that the scanner should only scan the next portion
309 * of the document and return. A scanner instance is
310 * permitted to completely scan a document if it does
311 * not support this "pull" scanning model.
312 *
313 * @return True if there is more to scan, false otherwise.
314 */
315 public boolean scanDocument(boolean complete)
316 throws IOException, XNIException {
317
318 // reset entity scanner
319 fEntityScanner = fEntityManager.getEntityScanner();
320
321 // keep dispatching "events"
322 fEntityManager.setEntityHandler(this);
323 do {
324 if (!fDispatcher.dispatch(complete)) {
325 return false;
326 }
327 } while (complete);
328
329 // return success
330 return true;
331
332 } // scanDocument(boolean):boolean
333
334 //
335 // XMLComponent methods
336 //
337
338 /**
339 * Resets the component. The component can query the component manager
340 * about any features and properties that affect the operation of the
341 * component.
342 *
343 * @param componentManager The component manager.
344 *
345 * @throws SAXException Thrown by component on initialization error.
346 * For example, if a feature or property is
347 * required for the operation of the component, the
348 * component manager may throw a
349 * SAXNotRecognizedException or a
350 * SAXNotSupportedException.
351 */
352 public void reset(XMLComponentManager componentManager)
353 throws XMLConfigurationException {
354
355 super.reset(componentManager);
356
357 // other settings
358 //fDocumentSystemId = null;
359
360 // sax features
361 fAttributes.setNamespaces(fNamespaces);
362
363 // initialize vars
364 fMarkupDepth = 0;
365 fCurrentElement = null;
366 fElementStack.clear();
367 fHasExternalDTD = false;
368 fStandalone = false;
369 fIsEntityDeclaredVC = false;
370 fInScanContent = false;
371
372 // setup dispatcher
373 setScannerState(SCANNER_STATE_CONTENT);
374 setDispatcher(fContentDispatcher);
375
376
377 if (fParserSettings) {
378 // parser settings have changed. reset them.
379
380 // xerces features
381 try {
382 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS);
383 } catch (XMLConfigurationException e) {
384 fNotifyBuiltInRefs = false;
385 }
386
387 // xerces properties
388 try {
389 Object resolver = componentManager.getProperty(ENTITY_RESOLVER);
390 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ?
391 (ExternalSubsetResolver) resolver : null;
392 }
393 catch (XMLConfigurationException e) {
394 fExternalSubsetResolver = null;
395 }
396 }
397
398 } // reset(XMLComponentManager)
399
400 /**
401 * Returns a list of feature identifiers that are recognized by
402 * this component. This method may return null if no features
403 * are recognized by this component.
404 */
405 public String[] getRecognizedFeatures() {
406 return (String[])(RECOGNIZED_FEATURES.clone());
407 } // getRecognizedFeatures():String[]
408
409 /**
410 * Sets the state of a feature. This method is called by the component
411 * manager any time after reset when a feature changes state.
412 * <p>
413 * <strong>Note:</strong> Components should silently ignore features
414 * that do not affect the operation of the component.
415 *
416 * @param featureId The feature identifier.
417 * @param state The state of the feature.
418 *
419 * @throws SAXNotRecognizedException The component should not throw
420 * this exception.
421 * @throws SAXNotSupportedException The component should not throw
422 * this exception.
423 */
424 public void setFeature(String featureId, boolean state)
425 throws XMLConfigurationException {
426
427 super.setFeature(featureId, state);
428
429 // Xerces properties
430 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
431 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
432 if (suffixLength == Constants.NOTIFY_BUILTIN_REFS_FEATURE.length() &&
433 featureId.endsWith(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
434 fNotifyBuiltInRefs = state;
435 }
436 }
437
438 } // setFeature(String,boolean)
439
440 /**
441 * Returns a list of property identifiers that are recognized by
442 * this component. This method may return null if no properties
443 * are recognized by this component.
444 */
445 public String[] getRecognizedProperties() {
446 return (String[])(RECOGNIZED_PROPERTIES.clone());
447 } // getRecognizedProperties():String[]
448
449 /**
450 * Sets the value of a property. This method is called by the component
451 * manager any time after reset when a property changes value.
452 * <p>
453 * <strong>Note:</strong> Components should silently ignore properties
454 * that do not affect the operation of the component.
455 *
456 * @param propertyId The property identifier.
457 * @param value The value of the property.
458 *
459 * @throws SAXNotRecognizedException The component should not throw
460 * this exception.
461 * @throws SAXNotSupportedException The component should not throw
462 * this exception.
463 */
464 public void setProperty(String propertyId, Object value)
465 throws XMLConfigurationException {
466
467 super.setProperty(propertyId, value);
468
469 // Xerces properties
470 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
471 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
472 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
473 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
474 fEntityManager = (XMLEntityManager)value;
475 return;
476 }
477 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() &&
478 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
479 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ?
480 (ExternalSubsetResolver) value : null;
481 return;
482 }
483 }
484
485 } // setProperty(String,Object)
486
487 /**
488 * Returns the default state for a feature, or null if this
489 * component does not want to report a default value for this
490 * feature.
491 *
492 * @param featureId The feature identifier.
493 *
494 * @since Xerces 2.2.0
495 */
496 public Boolean getFeatureDefault(String featureId) {
497 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
498 if (RECOGNIZED_FEATURES[i].equals(featureId)) {
499 return FEATURE_DEFAULTS[i];
500 }
501 }
502 return null;
503 } // getFeatureDefault(String):Boolean
504
505 /**
506 * Returns the default state for a property, or null if this
507 * component does not want to report a default value for this
508 * property.
509 *
510 * @param propertyId The property identifier.
511 *
512 * @since Xerces 2.2.0
513 */
514 public Object getPropertyDefault(String propertyId) {
515 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
516 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
517 return PROPERTY_DEFAULTS[i];
518 }
519 }
520 return null;
521 } // getPropertyDefault(String):Object
522
523 //
524 // XMLDocumentSource methods
525 //
526
527 /**
528 * setDocumentHandler
529 *
530 * @param documentHandler
531 */
532 public void setDocumentHandler(XMLDocumentHandler documentHandler) {
533 fDocumentHandler = documentHandler;
534 } // setDocumentHandler(XMLDocumentHandler)
535
536
537 /** Returns the document handler */
538 public XMLDocumentHandler getDocumentHandler(){
539 return fDocumentHandler;
540 }
541
542 //
543 // XMLEntityHandler methods
544 //
545
546 /**
547 * This method notifies of the start of an entity. The DTD has the
548 * pseudo-name of "[dtd]" parameter entity names start with '%'; and
549 * general entities are just specified by their name.
550 *
551 * @param name The name of the entity.
552 * @param identifier The resource identifier.
553 * @param encoding The auto-detected IANA encoding name of the entity
554 * stream. This value will be null in those situations
555 * where the entity encoding is not auto-detected (e.g.
556 * internal entities or a document entity that is
557 * parsed from a java.io.Reader).
558 * @param augs Additional information that may include infoset augmentations
559 *
560 * @throws XNIException Thrown by handler to signal an error.
561 */
562 public void startEntity(String name,
563 XMLResourceIdentifier identifier,
564 String encoding, Augmentations augs) throws XNIException {
565
566 // keep track of this entity before fEntityDepth is increased
567 if (fEntityDepth == fEntityStack.length) {
568 int[] entityarray = new int[fEntityStack.length * 2];
569 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length);
570 fEntityStack = entityarray;
571 }
572 fEntityStack[fEntityDepth] = fMarkupDepth;
573
574 super.startEntity(name, identifier, encoding, augs);
575
576 // WFC: entity declared in external subset in standalone doc
577 if(fStandalone && fEntityManager.isEntityDeclInExternalSubset(name)) {
578 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
579 new Object[]{name});
580 }
581
582 // call handler
583 if (fDocumentHandler != null && !fScanningAttribute) {
584 if (!name.equals("[xml]")) {
585 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs);
586 }
587 }
588
589 } // startEntity(String,XMLResourceIdentifier,String)
590
591 /**
592 * This method notifies the end of an entity. The DTD has the pseudo-name
593 * of "[dtd]" parameter entity names start with '%'; and general entities
594 * are just specified by their name.
595 *
596 * @param name The name of the entity.
597 * @param augs Additional information that may include infoset augmentations
598 *
599 * @throws XNIException Thrown by handler to signal an error.
600 */
601 public void endEntity(String name, Augmentations augs) throws XNIException {
602
603 // flush possible pending output buffer - see scanContent
604 if (fInScanContent && fStringBuffer.length != 0
605 && fDocumentHandler != null) {
606 fDocumentHandler.characters(fStringBuffer, null);
607 fStringBuffer.length = 0; // make sure we know it's been flushed
608 }
609
610 super.endEntity(name, augs);
611
612 // make sure markup is properly balanced
613 if (fMarkupDepth != fEntityStack[fEntityDepth]) {
614 reportFatalError("MarkupEntityMismatch", null);
615 }
616
617 // call handler
618 if (fDocumentHandler != null && !fScanningAttribute) {
619 if (!name.equals("[xml]")) {
620 fDocumentHandler.endGeneralEntity(name, augs);
621 }
622 }
623
624 } // endEntity(String)
625
626 //
627 // Protected methods
628 //
629
630 // dispatcher factory methods
631
632 /** Creates a content dispatcher. */
633 protected Dispatcher createContentDispatcher() {
634 return new FragmentContentDispatcher();
635 } // createContentDispatcher():Dispatcher
636
637 // scanning methods
638
639 /**
640 * Scans an XML or text declaration.
641 * <p>
642 * <pre>
643 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
644 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
645 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
646 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
647 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
648 * | ('"' ('yes' | 'no') '"'))
649 *
650 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
651 * </pre>
652 *
653 * @param scanningTextDecl True if a text declaration is to
654 * be scanned instead of an XML
655 * declaration.
656 */
657 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
658 throws IOException, XNIException {
659
660 // scan decl
661 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
662 fMarkupDepth--;
663
664 // pseudo-attribute values
665 String version = fStrings[0];
666 String encoding = fStrings[1];
667 String standalone = fStrings[2];
668
669 // set standalone
670 fStandalone = standalone != null && standalone.equals("yes");
671 fEntityManager.setStandalone(fStandalone);
672
673 // set version on reader
674 fEntityScanner.setXMLVersion(version);
675
676 // call handler
677 if (fDocumentHandler != null) {
678 if (scanningTextDecl) {
679 fDocumentHandler.textDecl(version, encoding, null);
680 }
681 else {
682 fDocumentHandler.xmlDecl(version, encoding, standalone, null);
683 }
684 }
685
686 // set encoding on reader
687 if (encoding != null && !fEntityScanner.fCurrentEntity.isEncodingExternallySpecified()) {
688 fEntityScanner.setEncoding(encoding);
689 }
690
691 } // scanXMLDeclOrTextDecl(boolean)
692
693 /**
694 * Scans a processing data. This is needed to handle the situation
695 * where a document starts with a processing instruction whose
696 * target name <em>starts with</em> "xml". (e.g. xmlfoo)
697 *
698 * @param target The PI target
699 * @param data The string to fill in with the data
700 */
701 protected void scanPIData(String target, XMLString data)
702 throws IOException, XNIException {
703
704 super.scanPIData(target, data);
705 fMarkupDepth--;
706
707 // call handler
708 if (fDocumentHandler != null) {
709 fDocumentHandler.processingInstruction(target, data, null);
710 }
711
712 } // scanPIData(String)
713
714 /**
715 * Scans a comment.
716 * <p>
717 * <pre>
718 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
719 * </pre>
720 * <p>
721 * <strong>Note:</strong> Called after scanning past '<!--'
722 */
723 protected void scanComment() throws IOException, XNIException {
724
725 scanComment(fStringBuffer);
726 fMarkupDepth--;
727
728 // call handler
729 if (fDocumentHandler != null) {
730 fDocumentHandler.comment(fStringBuffer, null);
731 }
732
733 } // scanComment()
734
735 /**
736 * Scans a start element. This method will handle the binding of
737 * namespace information and notifying the handler of the start
738 * of the element.
739 * <p>
740 * <pre>
741 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
742 * [40] STag ::= '<' Name (S Attribute)* S? '>'
743 * </pre>
744 * <p>
745 * <strong>Note:</strong> This method assumes that the leading
746 * '<' character has been consumed.
747 * <p>
748 * <strong>Note:</strong> This method uses the fElementQName and
749 * fAttributes variables. The contents of these variables will be
750 * destroyed. The caller should copy important information out of
751 * these variables before calling this method.
752 *
753 * @return True if element is empty. (i.e. It matches
754 * production [44].
755 */
756 protected boolean scanStartElement()
757 throws IOException, XNIException {
758 if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement()");
759
760 // name
761 if (fNamespaces) {
762 fEntityScanner.scanQName(fElementQName);
763 }
764 else {
765 String name = fEntityScanner.scanName();
766 fElementQName.setValues(null, name, name, null);
767 }
768 String rawname = fElementQName.rawname;
769
770 // push element stack
771 fCurrentElement = fElementStack.pushElement(fElementQName);
772
773 // attributes
774 boolean empty = false;
775 fAttributes.removeAllAttributes();
776 do {
777 // spaces
778 boolean sawSpace = fEntityScanner.skipSpaces();
779
780 // end tag?
781 int c = fEntityScanner.peekChar();
782 if (c == '>') {
783 fEntityScanner.scanChar();
784 break;
785 }
786 else if (c == '/') {
787 fEntityScanner.scanChar();
788 if (!fEntityScanner.skipChar('>')) {
789 reportFatalError("ElementUnterminated",
790 new Object[]{rawname});
791 }
792 empty = true;
793 break;
794 }
795 else if (!isValidNameStartChar(c) || !sawSpace) {
796 // Second chance. Check if this character is a high
797 // surrogate of a valid name start character.
798 if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
799 reportFatalError("ElementUnterminated",
800 new Object[] { rawname });
801 }
802 }
803
804 // attributes
805 scanAttribute(fAttributes);
806
807 } while (true);
808
809 // call handler
810 if (fDocumentHandler != null) {
811 if (empty) {
812
813 //decrease the markup depth..
814 fMarkupDepth--;
815 // check that this element was opened in the same entity
816 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
817 reportFatalError("ElementEntityMismatch",
818 new Object[]{fCurrentElement.rawname});
819 }
820
821 fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
822
823 //pop the element off the stack..
824 fElementStack.popElement(fElementQName);
825 }
826 else {
827 fDocumentHandler.startElement(fElementQName, fAttributes, null);
828 }
829 }
830
831 if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElement(): "+empty);
832 return empty;
833
834 } // scanStartElement():boolean
835
836 /**
837 * Scans the name of an element in a start or empty tag.
838 *
839 * @see #scanStartElement()
840 */
841 protected void scanStartElementName ()
842 throws IOException, XNIException {
843 // name
844 if (fNamespaces) {
845 fEntityScanner.scanQName(fElementQName);
846 }
847 else {
848 String name = fEntityScanner.scanName();
849 fElementQName.setValues(null, name, name, null);
850 }
851 // Must skip spaces here because the DTD scanner
852 // would consume them at the end of the external subset.
853 fSawSpace = fEntityScanner.skipSpaces();
854 } // scanStartElementName()
855
856 /**
857 * Scans the remainder of a start or empty tag after the element name.
858 *
859 * @see #scanStartElement
860 * @return True if element is empty.
861 */
862 protected boolean scanStartElementAfterName()
863 throws IOException, XNIException {
864 String rawname = fElementQName.rawname;
865
866 // push element stack
867 fCurrentElement = fElementStack.pushElement(fElementQName);
868
869 // attributes
870 boolean empty = false;
871 fAttributes.removeAllAttributes();
872 do {
873
874 // end tag?
875 int c = fEntityScanner.peekChar();
876 if (c == '>') {
877 fEntityScanner.scanChar();
878 break;
879 }
880 else if (c == '/') {
881 fEntityScanner.scanChar();
882 if (!fEntityScanner.skipChar('>')) {
883 reportFatalError("ElementUnterminated",
884 new Object[]{rawname});
885 }
886 empty = true;
887 break;
888 }
889 else if (!isValidNameStartChar(c) || !fSawSpace) {
890 // Second chance. Check if this character is a high
891 // surrogate of a valid name start character.
892 if (!isValidNameStartHighSurrogate(c) || !fSawSpace) {
893 reportFatalError("ElementUnterminated",
894 new Object[] { rawname });
895 }
896 }
897
898 // attributes
899 scanAttribute(fAttributes);
900
901 // spaces
902 fSawSpace = fEntityScanner.skipSpaces();
903
904 } while (true);
905
906 // call handler
907 if (fDocumentHandler != null) {
908 if (empty) {
909
910 //decrease the markup depth..
911 fMarkupDepth--;
912 // check that this element was opened in the same entity
913 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
914 reportFatalError("ElementEntityMismatch",
915 new Object[]{fCurrentElement.rawname});
916 }
917
918 fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
919
920 //pop the element off the stack..
921 fElementStack.popElement(fElementQName);
922 }
923 else {
924 fDocumentHandler.startElement(fElementQName, fAttributes, null);
925 }
926 }
927
928 if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElementAfterName(): "+empty);
929 return empty;
930 } // scanStartElementAfterName()
931
932 /**
933 * Scans an attribute.
934 * <p>
935 * <pre>
936 * [41] Attribute ::= Name Eq AttValue
937 * </pre>
938 * <p>
939 * <strong>Note:</strong> This method assumes that the next
940 * character on the stream is the first character of the attribute
941 * name.
942 * <p>
943 * <strong>Note:</strong> This method uses the fAttributeQName and
944 * fQName variables. The contents of these variables will be
945 * destroyed.
946 *
947 * @param attributes The attributes list for the scanned attribute.
948 */
949 protected void scanAttribute(XMLAttributes attributes)
950 throws IOException, XNIException {
951 if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()");
952
953 // name
954 if (fNamespaces) {
955 fEntityScanner.scanQName(fAttributeQName);
956 }
957 else {
958 String name = fEntityScanner.scanName();
959 fAttributeQName.setValues(null, name, name, null);
960 }
961
962 // equals
963 fEntityScanner.skipSpaces();
964 if (!fEntityScanner.skipChar('=')) {
965 reportFatalError("EqRequiredInAttribute",
966 new Object[]{fCurrentElement.rawname,fAttributeQName.rawname});
967 }
968 fEntityScanner.skipSpaces();
969
970 // content
971 int oldLen = attributes.getLength();
972 int attrIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
973
974 // WFC: Unique Att Spec
975 if (oldLen == attributes.getLength()) {
976 reportFatalError("AttributeNotUnique",
977 new Object[]{fCurrentElement.rawname,
978 fAttributeQName.rawname});
979 }
980
981 // Scan attribute value and return true if the un-normalized and normalized value are the same
982 boolean isSameNormalizedAttr = scanAttributeValue(fTempString, fTempString2,
983 fAttributeQName.rawname, fIsEntityDeclaredVC, fCurrentElement.rawname);
984
985 attributes.setValue(attrIndex, fTempString.toString());
986 // If the non-normalized and normalized value are the same, avoid creating a new string.
987 if (!isSameNormalizedAttr) {
988 attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
989 }
990 attributes.setSpecified(attrIndex, true);
991
992 if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()");
993 } // scanAttribute(XMLAttributes)
994
995 /**
996 * Scans element content.
997 *
998 * @return Returns the next character on the stream.
999 */
1000 protected int scanContent() throws IOException, XNIException {
1001
1002 XMLString content = fTempString;
1003 int c = fEntityScanner.scanContent(content);
1004 if (c == '\r') {
1005 // happens when there is the character reference
1006 fEntityScanner.scanChar();
1007 fStringBuffer.clear();
1008 fStringBuffer.append(fTempString);
1009 fStringBuffer.append((char)c);
1010 content = fStringBuffer;
1011 c = -1;
1012 }
1013 if (fDocumentHandler != null && content.length > 0) {
1014 fDocumentHandler.characters(content, null);
1015 }
1016
1017 if (c == ']' && fTempString.length == 0) {
1018 fStringBuffer.clear();
1019 fStringBuffer.append((char)fEntityScanner.scanChar());
1020 // remember where we are in case we get an endEntity before we
1021 // could flush the buffer out - this happens when we're parsing an
1022 // entity which ends with a ]
1023 fInScanContent = true;
1024 //
1025 // We work on a single character basis to handle cases such as:
1026 // ']]]>' which we might otherwise miss.
1027 //
1028 if (fEntityScanner.skipChar(']')) {
1029 fStringBuffer.append(']');
1030 while (fEntityScanner.skipChar(']')) {
1031 fStringBuffer.append(']');
1032 }
1033 if (fEntityScanner.skipChar('>')) {
1034 reportFatalError("CDEndInContent", null);
1035 }
1036 }
1037 if (fDocumentHandler != null && fStringBuffer.length != 0) {
1038 fDocumentHandler.characters(fStringBuffer, null);
1039 }
1040 fInScanContent = false;
1041 c = -1;
1042 }
1043 return c;
1044
1045 } // scanContent():int
1046
1047
1048 /**
1049 * Scans a CDATA section.
1050 * <p>
1051 * <strong>Note:</strong> This method uses the fTempString and
1052 * fStringBuffer variables.
1053 *
1054 * @param complete True if the CDATA section is to be scanned
1055 * completely.
1056 *
1057 * @return True if CDATA is completely scanned.
1058 */
1059 protected boolean scanCDATASection(boolean complete)
1060 throws IOException, XNIException {
1061
1062 // call handler
1063 if (fDocumentHandler != null) {
1064 fDocumentHandler.startCDATA(null);
1065 }
1066
1067 while (true) {
1068 fStringBuffer.clear();
1069 if (!fEntityScanner.scanData("]]", fStringBuffer)) {
1070 if (fDocumentHandler != null && fStringBuffer.length > 0) {
1071 fDocumentHandler.characters(fStringBuffer, null);
1072 }
1073 int brackets = 0;
1074 while (fEntityScanner.skipChar(']')) {
1075 brackets++;
1076 }
1077 if (fDocumentHandler != null && brackets > 0) {
1078 fStringBuffer.clear();
1079 if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) {
1080 // Handle large sequences of ']'
1081 int chunks = brackets / XMLEntityManager.DEFAULT_BUFFER_SIZE;
1082 int remainder = brackets % XMLEntityManager.DEFAULT_BUFFER_SIZE;
1083 for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) {
1084 fStringBuffer.append(']');
1085 }
1086 for (int i = 0; i < chunks; i++) {
1087 fDocumentHandler.characters(fStringBuffer, null);
1088 }
1089 if (remainder != 0) {
1090 fStringBuffer.length = remainder;
1091 fDocumentHandler.characters(fStringBuffer, null);
1092 }
1093 }
1094 else {
1095 for (int i = 0; i < brackets; i++) {
1096 fStringBuffer.append(']');
1097 }
1098 fDocumentHandler.characters(fStringBuffer, null);
1099 }
1100 }
1101 if (fEntityScanner.skipChar('>')) {
1102 break;
1103 }
1104 if (fDocumentHandler != null) {
1105 fStringBuffer.clear();
1106 fStringBuffer.append("]]");
1107 fDocumentHandler.characters(fStringBuffer, null);
1108 }
1109 }
1110 else {
1111 if (fDocumentHandler != null) {
1112 fDocumentHandler.characters(fStringBuffer, null);
1113 }
1114 int c = fEntityScanner.peekChar();
1115 if (c != -1 && isInvalidLiteral(c)) {
1116 if (XMLChar.isHighSurrogate(c)) {
1117 fStringBuffer.clear();
1118 scanSurrogates(fStringBuffer);
1119 if (fDocumentHandler != null) {
1120 fDocumentHandler.characters(fStringBuffer, null);
1121 }
1122 }
1123 else {
1124 reportFatalError("InvalidCharInCDSect",
1125 new Object[]{Integer.toString(c,16)});
1126 fEntityScanner.scanChar();
1127 }
1128 }
1129 }
1130 }
1131 fMarkupDepth--;
1132
1133 // call handler
1134 if (fDocumentHandler != null) {
1135 fDocumentHandler.endCDATA(null);
1136 }
1137
1138 return true;
1139
1140 } // scanCDATASection(boolean):boolean
1141
1142 /**
1143 * Scans an end element.
1144 * <p>
1145 * <pre>
1146 * [42] ETag ::= '</' Name S? '>'
1147 * </pre>
1148 * <p>
1149 * <strong>Note:</strong> This method uses the fElementQName variable.
1150 * The contents of this variable will be destroyed. The caller should
1151 * copy the needed information out of this variable before calling
1152 * this method.
1153 *
1154 * @return The element depth.
1155 */
1156 protected int scanEndElement() throws IOException, XNIException {
1157 if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanEndElement()");
1158
1159 fElementStack.popElement(fElementQName) ;
1160
1161 // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
1162 //In scanners most of the time is consumed on checks done for XML characters, we can
1163 // optimize on it and avoid the checks done for endElement,
1164 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com
1165
1166 // this should work both for namespace processing true or false...
1167
1168 //REVISIT: if the string is not the same as expected.. we need to do better error handling..
1169 //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
1170 if (!fEntityScanner.skipString(fElementQName.rawname)) {
1171 reportFatalError("ETagRequired", new Object[]{fElementQName.rawname});
1172 }
1173
1174 // end
1175 fEntityScanner.skipSpaces();
1176 if (!fEntityScanner.skipChar('>')) {
1177 reportFatalError("ETagUnterminated",
1178 new Object[]{fElementQName.rawname});
1179 }
1180 fMarkupDepth--;
1181
1182 //we have increased the depth for two markup "<" characters
1183 fMarkupDepth--;
1184
1185 // check that this element was opened in the same entity
1186 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1187 reportFatalError("ElementEntityMismatch",
1188 new Object[]{fCurrentElement.rawname});
1189 }
1190
1191 // call handler
1192 if (fDocumentHandler != null ) {
1193 fDocumentHandler.endElement(fElementQName, null);
1194 }
1195
1196 return fMarkupDepth;
1197
1198 } // scanEndElement():int
1199
1200 /**
1201 * Scans a character reference.
1202 * <p>
1203 * <pre>
1204 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1205 * </pre>
1206 */
1207 protected void scanCharReference()
1208 throws IOException, XNIException {
1209
1210 fStringBuffer2.clear();
1211 int ch = scanCharReferenceValue(fStringBuffer2, null);
1212 fMarkupDepth--;
1213 if (ch != -1) {
1214 // call handler
1215 if (fDocumentHandler != null) {
1216 if (fNotifyCharRefs) {
1217 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null);
1218 }
1219 Augmentations augs = null;
1220 if (fValidation && ch <= 0x20) {
1221 if (fTempAugmentations != null) {
1222 fTempAugmentations.removeAllItems();
1223 }
1224 else {
1225 fTempAugmentations = new AugmentationsImpl();
1226 }
1227 augs = fTempAugmentations;
1228 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE);
1229 }
1230 fDocumentHandler.characters(fStringBuffer2, augs);
1231 if (fNotifyCharRefs) {
1232 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null);
1233 }
1234 }
1235 }
1236
1237 } // scanCharReference()
1238
1239 /**
1240 * Scans an entity reference.
1241 *
1242 * @throws IOException Thrown if i/o error occurs.
1243 * @throws XNIException Thrown if handler throws exception upon
1244 * notification.
1245 */
1246 protected void scanEntityReference() throws IOException, XNIException {
1247
1248 // name
1249 String name = fEntityScanner.scanName();
1250 if (name == null) {
1251 reportFatalError("NameRequiredInReference", null);
1252 return;
1253 }
1254
1255 // end
1256 if (!fEntityScanner.skipChar(';')) {
1257 reportFatalError("SemicolonRequiredInReference", new Object []{name});
1258 }
1259 fMarkupDepth--;
1260
1261 // handle built-in entities
1262 if (name == fAmpSymbol) {
1263 handleCharacter('&', fAmpSymbol);
1264 }
1265 else if (name == fLtSymbol) {
1266 handleCharacter('<', fLtSymbol);
1267 }
1268 else if (name == fGtSymbol) {
1269 handleCharacter('>', fGtSymbol);
1270 }
1271 else if (name == fQuotSymbol) {
1272 handleCharacter('"', fQuotSymbol);
1273 }
1274 else if (name == fAposSymbol) {
1275 handleCharacter('\'', fAposSymbol);
1276 }
1277 // start general entity
1278 else if (fEntityManager.isUnparsedEntity(name)) {
1279 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name});
1280 }
1281 else {
1282 if (!fEntityManager.isDeclaredEntity(name)) {
1283 if (fIsEntityDeclaredVC) {
1284 if (fValidation)
1285 fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared",
1286 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR);
1287 }
1288 else {
1289 reportFatalError("EntityNotDeclared", new Object[]{name});
1290 }
1291 }
1292 fEntityManager.startEntity(name, false);
1293 }
1294
1295 } // scanEntityReference()
1296
1297 // utility methods
1298
1299 /**
1300 * Calls document handler with a single character resulting from
1301 * built-in entity resolution.
1302 *
1303 * @param c
1304 * @param entity built-in name
1305 */
1306 private void handleCharacter(char c, String entity) throws XNIException {
1307 if (fDocumentHandler != null) {
1308 if (fNotifyBuiltInRefs) {
1309 fDocumentHandler.startGeneralEntity(entity, null, null, null);
1310 }
1311
1312 fSingleChar[0] = c;
1313 fTempString.setValues(fSingleChar, 0, 1);
1314 fDocumentHandler.characters(fTempString, null);
1315
1316 if (fNotifyBuiltInRefs) {
1317 fDocumentHandler.endGeneralEntity(entity, null);
1318 }
1319 }
1320 } // handleCharacter(char)
1321
1322 /**
1323 * Handles the end element. This method will make sure that
1324 * the end element name matches the current element and notify
1325 * the handler about the end of the element and the end of any
1326 * relevent prefix mappings.
1327 * <p>
1328 * <strong>Note:</strong> This method uses the fQName variable.
1329 * The contents of this variable will be destroyed.
1330 *
1331 * @param element The element.
1332 *
1333 * @return The element depth.
1334 *
1335 * @throws XNIException Thrown if the handler throws a SAX exception
1336 * upon notification.
1337 *
1338 */
1339 // REVISIT: need to remove this method. It's not called anymore, because
1340 // the handling is done when the end tag is scanned. - SG
1341 protected int handleEndElement(QName element, boolean isEmpty)
1342 throws XNIException {
1343
1344 fMarkupDepth--;
1345 // check that this element was opened in the same entity
1346 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1347 reportFatalError("ElementEntityMismatch",
1348 new Object[]{fCurrentElement.rawname});
1349 }
1350 // make sure the elements match
1351 QName startElement = fQName;
1352 fElementStack.popElement(startElement);
1353 if (element.rawname != startElement.rawname) {
1354 reportFatalError("ETagRequired",
1355 new Object[]{startElement.rawname});
1356 }
1357
1358 // bind namespaces
1359 if (fNamespaces) {
1360 element.uri = startElement.uri;
1361 }
1362
1363 // call handler
1364 if (fDocumentHandler != null && !isEmpty) {
1365 fDocumentHandler.endElement(element, null);
1366 }
1367
1368 return fMarkupDepth;
1369
1370 } // callEndElement(QName,boolean):int
1371
1372 // helper methods
1373
1374 /**
1375 * Sets the scanner state.
1376 *
1377 * @param state The new scanner state.
1378 */
1379 protected final void setScannerState(int state) {
1380
1381 fScannerState = state;
1382 if (DEBUG_SCANNER_STATE) {
1383 System.out.print("### setScannerState: ");
1384 System.out.print(getScannerStateName(state));
1385 System.out.println();
1386 }
1387
1388 } // setScannerState(int)
1389
1390 /**
1391 * Sets the dispatcher.
1392 *
1393 * @param dispatcher The new dispatcher.
1394 */
1395 protected final void setDispatcher(Dispatcher dispatcher) {
1396 fDispatcher = dispatcher;
1397 if (DEBUG_DISPATCHER) {
1398 System.out.print("%%% setDispatcher: ");
1399 System.out.print(getDispatcherName(dispatcher));
1400 System.out.println();
1401 }
1402 }
1403
1404 //
1405 // Private methods
1406 //
1407
1408 /** Returns the scanner state name. */
1409 protected String getScannerStateName(int state) {
1410
1411 switch (state) {
1412 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE";
1413 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT";
1414 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP";
1415 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT";
1416 case SCANNER_STATE_PI: return "SCANNER_STATE_PI";
1417 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT";
1418 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE";
1419 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT";
1420 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED";
1421 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA";
1422 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL";
1423 }
1424
1425 return "??? ("+state+')';
1426
1427 } // getScannerStateName(int):String
1428
1429 /** Returns the dispatcher name. */
1430 public String getDispatcherName(Dispatcher dispatcher) {
1431
1432 if (DEBUG_DISPATCHER) {
1433 if (dispatcher != null) {
1434 String name = dispatcher.getClass().getName();
1435 int index = name.lastIndexOf('.');
1436 if (index != -1) {
1437 name = name.substring(index + 1);
1438 index = name.lastIndexOf('$');
1439 if (index != -1) {
1440 name = name.substring(index + 1);
1441 }
1442 }
1443 return name;
1444 }
1445 }
1446 return "null";
1447
1448 } // getDispatcherName():String
1449
1450 //
1451 // Classes
1452 //
1453
1454 /**
1455 * Element stack. This stack operates without synchronization, error
1456 * checking, and it re-uses objects instead of throwing popped items
1457 * away.
1458 *
1459 * @author Andy Clark, IBM
1460 */
1461 protected static class ElementStack {
1462
1463 //
1464 // Data
1465 //
1466
1467 /** The stack data. */
1468 protected QName[] fElements;
1469
1470 /** The size of the stack. */
1471 protected int fSize;
1472
1473 //
1474 // Constructors
1475 //
1476
1477 /** Default constructor. */
1478 public ElementStack() {
1479 fElements = new QName[10];
1480 for (int i = 0; i < fElements.length; i++) {
1481 fElements[i] = new QName();
1482 }
1483 } // <init>()
1484
1485 //
1486 // Public methods
1487 //
1488
1489 /**
1490 * Pushes an element on the stack.
1491 * <p>
1492 * <strong>Note:</strong> The QName values are copied into the
1493 * stack. In other words, the caller does <em>not</em> orphan
1494 * the element to the stack. Also, the QName object returned
1495 * is <em>not</em> orphaned to the caller. It should be
1496 * considered read-only.
1497 *
1498 * @param element The element to push onto the stack.
1499 *
1500 * @return Returns the actual QName object that stores the
1501 */
1502 public QName pushElement(QName element) {
1503 if (fSize == fElements.length) {
1504 QName[] array = new QName[fElements.length * 2];
1505 System.arraycopy(fElements, 0, array, 0, fSize);
1506 fElements = array;
1507 for (int i = fSize; i < fElements.length; i++) {
1508 fElements[i] = new QName();
1509 }
1510 }
1511 fElements[fSize].setValues(element);
1512 return fElements[fSize++];
1513 } // pushElement(QName):QName
1514
1515 /**
1516 * Pops an element off of the stack by setting the values of
1517 * the specified QName.
1518 * <p>
1519 * <strong>Note:</strong> The object returned is <em>not</em>
1520 * orphaned to the caller. Therefore, the caller should consider
1521 * the object to be read-only.
1522 */
1523 public void popElement(QName element) {
1524 element.setValues(fElements[--fSize]);
1525 } // popElement(QName)
1526
1527 /** Clears the stack without throwing away existing QName objects. */
1528 public void clear() {
1529 fSize = 0;
1530 } // clear()
1531
1532 } // class ElementStack
1533
1534 /**
1535 * This interface defines an XML "event" dispatching model. Classes
1536 * that implement this interface are responsible for scanning parts
1537 * of the XML document and dispatching callbacks.
1538 *
1539 * @xerces.internal
1540 *
1541 * @author Glenn Marcy, IBM
1542 */
1543 protected interface Dispatcher {
1544
1545 //
1546 // Dispatcher methods
1547 //
1548
1549 /**
1550 * Dispatch an XML "event".
1551 *
1552 * @param complete True if this dispatcher is intended to scan
1553 * and dispatch as much as possible.
1554 *
1555 * @return True if there is more to dispatch either from this
1556 * or a another dispatcher.
1557 *
1558 * @throws IOException Thrown on i/o error.
1559 * @throws XNIException Thrown on parse error.
1560 */
1561 public boolean dispatch(boolean complete)
1562 throws IOException, XNIException;
1563
1564 } // interface Dispatcher
1565
1566 /**
1567 * Dispatcher to handle content scanning.
1568 *
1569 * @author Andy Clark, IBM
1570 * @author Eric Ye, IBM
1571 */
1572 protected class FragmentContentDispatcher
1573 implements Dispatcher {
1574
1575 //
1576 // Dispatcher methods
1577 //
1578
1579 /**
1580 * Dispatch an XML "event".
1581 *
1582 * @param complete True if this dispatcher is intended to scan
1583 * and dispatch as much as possible.
1584 *
1585 * @return True if there is more to dispatch either from this
1586 * or a another dispatcher.
1587 *
1588 * @throws IOException Thrown on i/o error.
1589 * @throws XNIException Thrown on parse error.
1590 */
1591 public boolean dispatch(boolean complete)
1592 throws IOException, XNIException {
1593 try {
1594 boolean again;
1595 do {
1596 again = false;
1597 switch (fScannerState) {
1598 case SCANNER_STATE_CONTENT: {
1599 if (fEntityScanner.skipChar('<')) {
1600 setScannerState(SCANNER_STATE_START_OF_MARKUP);
1601 again = true;
1602 }
1603 else if (fEntityScanner.skipChar('&')) {
1604 setScannerState(SCANNER_STATE_REFERENCE);
1605 again = true;
1606 }
1607 else {
1608 do {
1609 int c = scanContent();
1610 if (c == '<') {
1611 fEntityScanner.scanChar();
1612 setScannerState(SCANNER_STATE_START_OF_MARKUP);
1613 break;
1614 }
1615 else if (c == '&') {
1616 fEntityScanner.scanChar();
1617 setScannerState(SCANNER_STATE_REFERENCE);
1618 break;
1619 }
1620 else if (c != -1 && isInvalidLiteral(c)) {
1621 if (XMLChar.isHighSurrogate(c)) {
1622 // special case: surrogates
1623 fStringBuffer.clear();
1624 if (scanSurrogates(fStringBuffer)) {
1625 // call handler
1626 if (fDocumentHandler != null) {
1627 fDocumentHandler.characters(fStringBuffer, null);
1628 }
1629 }
1630 }
1631 else {
1632 reportFatalError("InvalidCharInContent",
1633 new Object[] {
1634 Integer.toString(c, 16)});
1635 fEntityScanner.scanChar();
1636 }
1637 }
1638 } while (complete);
1639 }
1640 break;
1641 }
1642 case SCANNER_STATE_START_OF_MARKUP: {
1643 fMarkupDepth++;
1644 if (fEntityScanner.skipChar('/')) {
1645 if (scanEndElement() == 0) {
1646 if (elementDepthIsZeroHook()) {
1647 return true;
1648 }
1649 }
1650 setScannerState(SCANNER_STATE_CONTENT);
1651 }
1652 else if (isValidNameStartChar(fEntityScanner.peekChar())) {
1653 scanStartElement();
1654 setScannerState(SCANNER_STATE_CONTENT);
1655 }
1656 else if (fEntityScanner.skipChar('!')) {
1657 if (fEntityScanner.skipChar('-')) {
1658 if (!fEntityScanner.skipChar('-')) {
1659 reportFatalError("InvalidCommentStart",
1660 null);
1661 }
1662 setScannerState(SCANNER_STATE_COMMENT);
1663 again = true;
1664 }
1665 else if (fEntityScanner.skipString("[CDATA[")) {
1666 setScannerState(SCANNER_STATE_CDATA);
1667 again = true;
1668 }
1669 else if (!scanForDoctypeHook()) {
1670 reportFatalError("MarkupNotRecognizedInContent",
1671 null);
1672 }
1673 }
1674 else if (fEntityScanner.skipChar('?')) {
1675 setScannerState(SCANNER_STATE_PI);
1676 again = true;
1677 }
1678 else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
1679 scanStartElement();
1680 setScannerState(SCANNER_STATE_CONTENT);
1681 }
1682 else {
1683 reportFatalError("MarkupNotRecognizedInContent",
1684 null);
1685 setScannerState(SCANNER_STATE_CONTENT);
1686 }
1687 break;
1688 }
1689 case SCANNER_STATE_COMMENT: {
1690 scanComment();
1691 setScannerState(SCANNER_STATE_CONTENT);
1692 break;
1693 }
1694 case SCANNER_STATE_PI: {
1695 scanPI();
1696 setScannerState(SCANNER_STATE_CONTENT);
1697 break;
1698 }
1699 case SCANNER_STATE_CDATA: {
1700 scanCDATASection(complete);
1701 setScannerState(SCANNER_STATE_CONTENT);
1702 break;
1703 }
1704 case SCANNER_STATE_REFERENCE: {
1705 fMarkupDepth++;
1706 // NOTE: We need to set the state beforehand
1707 // because the XMLEntityHandler#startEntity
1708 // callback could set the state to
1709 // SCANNER_STATE_TEXT_DECL and we don't want
1710 // to override that scanner state.
1711 setScannerState(SCANNER_STATE_CONTENT);
1712 if (fEntityScanner.skipChar('#')) {
1713 scanCharReference();
1714 }
1715 else {
1716 scanEntityReference();
1717 }
1718 break;
1719 }
1720 case SCANNER_STATE_TEXT_DECL: {
1721 // scan text decl
1722 if (fEntityScanner.skipString("<?xml")) {
1723 fMarkupDepth++;
1724 // NOTE: special case where entity starts with a PI
1725 // whose name starts with "xml" (e.g. "xmlfoo")
1726 if (isValidNameChar(fEntityScanner.peekChar())) {
1727 fStringBuffer.clear();
1728 fStringBuffer.append("xml");
1729 if (fNamespaces) {
1730 while (isValidNCName(fEntityScanner.peekChar())) {
1731 fStringBuffer.append((char)fEntityScanner.scanChar());
1732 }
1733 }
1734 else {
1735 while (isValidNameChar(fEntityScanner.peekChar())) {
1736 fStringBuffer.append((char)fEntityScanner.scanChar());
1737 }
1738 }
1739 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
1740 scanPIData(target, fTempString);
1741 }
1742
1743 // standard text declaration
1744 else {
1745 scanXMLDeclOrTextDecl(true);
1746 }
1747 }
1748 // now that we've straightened out the readers, we can read in chunks:
1749 fEntityManager.fCurrentEntity.mayReadChunks = true;
1750 setScannerState(SCANNER_STATE_CONTENT);
1751 break;
1752 }
1753 case SCANNER_STATE_ROOT_ELEMENT: {
1754 if (scanRootElementHook()) {
1755 return true;
1756 }
1757 setScannerState(SCANNER_STATE_CONTENT);
1758 break;
1759 }
1760 case SCANNER_STATE_DOCTYPE: {
1761 reportFatalError("DoctypeIllegalInContent",
1762 null);
1763 setScannerState(SCANNER_STATE_CONTENT);
1764 }
1765 }
1766 } while (complete || again);
1767 }
1768 // encoding errors
1769 catch (MalformedByteSequenceException e) {
1770 fErrorReporter.reportError(e.getDomain(), e.getKey(),
1771 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1772 return false;
1773 }
1774 catch (CharConversionException e) {
1775 fErrorReporter.reportError(
1776 XMLMessageFormatter.XML_DOMAIN,
1777 "CharConversionFailure",
1778 null,
1779 XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1780 return false;
1781 }
1782 // premature end of file
1783 catch (EOFException e) {
1784 endOfFileHook(e);
1785 return false;
1786 }
1787
1788 return true;
1789
1790 } // dispatch(boolean):boolean
1791
1792 //
1793 // Protected methods
1794 //
1795
1796 // hooks
1797
1798 // NOTE: These hook methods are added so that the full document
1799 // scanner can share the majority of code with this class.
1800
1801 /**
1802 * Scan for DOCTYPE hook. This method is a hook for subclasses
1803 * to add code to handle scanning for a the "DOCTYPE" string
1804 * after the string "<!" has been scanned.
1805 *
1806 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
1807 * was not scanned.
1808 */
1809 protected boolean scanForDoctypeHook()
1810 throws IOException, XNIException {
1811 return false;
1812 } // scanForDoctypeHook():boolean
1813
1814 /**
1815 * Element depth iz zero. This methos is a hook for subclasses
1816 * to add code to handle when the element depth hits zero. When
1817 * scanning a document fragment, an element depth of zero is
1818 * normal. However, when scanning a full XML document, the
1819 * scanner must handle the trailing miscellanous section of
1820 * the document after the end of the document's root element.
1821 *
1822 * @return True if the caller should stop and return true which
1823 * allows the scanner to switch to a new scanning
1824 * dispatcher. A return value of false indicates that
1825 * the content dispatcher should continue as normal.
1826 */
1827 protected boolean elementDepthIsZeroHook()
1828 throws IOException, XNIException {
1829 return false;
1830 } // elementDepthIsZeroHook():boolean
1831
1832 /**
1833 * Scan for root element hook. This method is a hook for
1834 * subclasses to add code that handles scanning for the root
1835 * element. When scanning a document fragment, there is no
1836 * "root" element. However, when scanning a full XML document,
1837 * the scanner must handle the root element specially.
1838 *
1839 * @return True if the caller should stop and return true which
1840 * allows the scanner to switch to a new scanning
1841 * dispatcher. A return value of false indicates that
1842 * the content dispatcher should continue as normal.
1843 */
1844 protected boolean scanRootElementHook()
1845 throws IOException, XNIException {
1846 return false;
1847 } // scanRootElementHook():boolean
1848
1849 /**
1850 * End of file hook. This method is a hook for subclasses to
1851 * add code that handles the end of file. The end of file in
1852 * a document fragment is OK if the markup depth is zero.
1853 * However, when scanning a full XML document, an end of file
1854 * is always premature.
1855 */
1856 protected void endOfFileHook(EOFException e)
1857 throws IOException, XNIException {
1858
1859 // NOTE: An end of file is only only an error if we were
1860 // in the middle of scanning some markup. -Ac
1861 if (fMarkupDepth != 0) {
1862 reportFatalError("PrematureEOF", null);
1863 }
1864
1865 } // endOfFileHook()
1866
1867 } // class FragmentContentDispatcher
1868
1869 } // class XMLDocumentFragmentScannerImpl