1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.xerces.impl;
19
20 import java.io.IOException;
21
22 import org.apache.xerces.impl.msg.XMLMessageFormatter;
23 import org.apache.xerces.util.SymbolTable;
24 import org.apache.xerces.util.XMLChar;
25 import org.apache.xerces.util.XMLResourceIdentifierImpl;
26 import org.apache.xerces.util.XMLStringBuffer;
27 import org.apache.xerces.xni.Augmentations;
28 import org.apache.xerces.xni.XMLResourceIdentifier;
29 import org.apache.xerces.xni.XMLString;
30 import org.apache.xerces.xni.XNIException;
31 import org.apache.xerces.xni.parser.XMLComponent;
32 import org.apache.xerces.xni.parser.XMLComponentManager;
33 import org.apache.xerces.xni.parser.XMLConfigurationException;
34
35 /**
36 * This class is responsible for holding scanning methods common to
37 * scanning the XML document structure and content as well as the DTD
38 * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
39 * from this base class.
40 *
41 * <p>
42 * This component requires the following features and properties from the
43 * component manager that uses it:
44 * <ul>
45 * <li>http://xml.org/sax/features/validation</li>
46 * <li>http://xml.org/sax/features/namespaces</li>
47 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
48 * <li>http://apache.org/xml/properties/internal/symbol-table</li>
49 * <li>http://apache.org/xml/properties/internal/error-reporter</li>
50 * <li>http://apache.org/xml/properties/internal/entity-manager</li>
51 * </ul>
52 *
53 * @xerces.internal
54 *
55 * @author Andy Clark, IBM
56 * @author Arnaud Le Hors, IBM
57 * @author Eric Ye, IBM
58 *
59 * @version $Id: XMLScanner.java 572055 2007-09-02 17:55:43Z mrglavas $
60 */
61 public abstract class XMLScanner
62 implements XMLComponent {
63
64 //
65 // Constants
66 //
67
68 // feature identifiers
69
70 /** Feature identifier: validation. */
71 protected static final String VALIDATION =
72 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
73
74 /** Feature identifier: namespaces. */
75 protected static final String NAMESPACES =
76 Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
77
78 /** Feature identifier: notify character references. */
79 protected static final String NOTIFY_CHAR_REFS =
80 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
81
82 protected static final String PARSER_SETTINGS =
83 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
84
85 // property identifiers
86
87 /** Property identifier: symbol table. */
88 protected static final String SYMBOL_TABLE =
89 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
90
91 /** Property identifier: error reporter. */
92 protected static final String ERROR_REPORTER =
93 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
94
95 /** Property identifier: entity manager. */
96 protected static final String ENTITY_MANAGER =
97 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
98
99 // debugging
100
101 /** Debug attribute normalization. */
102 protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
103
104 //
105 // Data
106 //
107
108
109 // features
110
111 /**
112 * Validation. This feature identifier is:
113 * http://xml.org/sax/features/validation
114 */
115 protected boolean fValidation = false;
116
117 /** Namespaces. */
118 protected boolean fNamespaces;
119
120 /** Character references notification. */
121 protected boolean fNotifyCharRefs = false;
122
123 /** Internal parser-settings feature */
124 protected boolean fParserSettings = true;
125
126 // properties
127
128 /** Symbol table. */
129 protected SymbolTable fSymbolTable;
130
131 /** Error reporter. */
132 protected XMLErrorReporter fErrorReporter;
133
134 /** Entity manager. */
135 protected XMLEntityManager fEntityManager;
136
137 // protected data
138
139 /** Entity scanner. */
140 protected XMLEntityScanner fEntityScanner;
141
142 /** Entity depth. */
143 protected int fEntityDepth;
144
145 /** Literal value of the last character refence scanned. */
146 protected String fCharRefLiteral = null;
147
148 /** Scanning attribute. */
149 protected boolean fScanningAttribute;
150
151 /** Report entity boundary. */
152 protected boolean fReportEntity;
153
154 // symbols
155
156 /** Symbol: "version". */
157 protected final static String fVersionSymbol = "version".intern();
158
159 /** Symbol: "encoding". */
160 protected final static String fEncodingSymbol = "encoding".intern();
161
162 /** Symbol: "standalone". */
163 protected final static String fStandaloneSymbol = "standalone".intern();
164
165 /** Symbol: "amp". */
166 protected final static String fAmpSymbol = "amp".intern();
167
168 /** Symbol: "lt". */
169 protected final static String fLtSymbol = "lt".intern();
170
171 /** Symbol: "gt". */
172 protected final static String fGtSymbol = "gt".intern();
173
174 /** Symbol: "quot". */
175 protected final static String fQuotSymbol = "quot".intern();
176
177 /** Symbol: "apos". */
178 protected final static String fAposSymbol = "apos".intern();
179
180 // temporary variables
181
182 // NOTE: These objects are private to help prevent accidental modification
183 // of values by a subclass. If there were protected *and* the sub-
184 // modified the values, it would be difficult to track down the real
185 // cause of the bug. By making these private, we avoid this
186 // possibility.
187
188 /** String. */
189 private final XMLString fString = new XMLString();
190
191 /** String buffer. */
192 private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
193
194 /** String buffer. */
195 private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
196
197 /** String buffer. */
198 private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
199
200 // temporary location for Resource identification information.
201 protected final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
202
203 //
204 // XMLComponent methods
205 //
206
207 /**
208 *
209 *
210 * @param componentManager The component manager.
211 *
212 * @throws SAXException Throws exception if required features and
213 * properties cannot be found.
214 */
215 public void reset(XMLComponentManager componentManager)
216 throws XMLConfigurationException {
217
218 try {
219 fParserSettings = componentManager.getFeature(PARSER_SETTINGS);
220 } catch (XMLConfigurationException e) {
221 fParserSettings = true;
222 }
223
224 if (!fParserSettings) {
225 // parser settings have not been changed
226 init();
227 return;
228 }
229
230 // Xerces properties
231 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
232 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
233 fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
234
235 // sax features
236 try {
237 fValidation = componentManager.getFeature(VALIDATION);
238 }
239 catch (XMLConfigurationException e) {
240 fValidation = false;
241 }
242 try {
243 fNamespaces = componentManager.getFeature(NAMESPACES);
244 }
245 catch (XMLConfigurationException e) {
246 fNamespaces = true;
247 }
248 try {
249 fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS);
250 }
251 catch (XMLConfigurationException e) {
252 fNotifyCharRefs = false;
253 }
254
255 init();
256
257 } // reset(XMLComponentManager)
258
259 /**
260 * Sets the value of a property during parsing.
261 *
262 * @param propertyId
263 * @param value
264 */
265 public void setProperty(String propertyId, Object value)
266 throws XMLConfigurationException {
267
268 // Xerces properties
269 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
270 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
271
272 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() &&
273 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
274 fSymbolTable = (SymbolTable)value;
275 }
276 else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() &&
277 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
278 fErrorReporter = (XMLErrorReporter)value;
279 }
280 else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
281 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
282 fEntityManager = (XMLEntityManager)value;
283 }
284 }
285
286 } // setProperty(String,Object)
287
288 /*
289 * Sets the feature of the scanner.
290 */
291 public void setFeature(String featureId, boolean value)
292 throws XMLConfigurationException {
293
294 if (VALIDATION.equals(featureId)) {
295 fValidation = value;
296 } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
297 fNotifyCharRefs = value;
298 }
299 }
300
301 /*
302 * Gets the state of the feature of the scanner.
303 */
304 public boolean getFeature(String featureId)
305 throws XMLConfigurationException {
306
307 if (VALIDATION.equals(featureId)) {
308 return fValidation;
309 } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
310 return fNotifyCharRefs;
311 }
312 throw new XMLConfigurationException(XMLConfigurationException.NOT_RECOGNIZED, featureId);
313 }
314
315 //
316 // Protected methods
317 //
318
319 // anybody calling this had better have set Symtoltable!
320 protected void reset() {
321 init();
322
323 // DTD preparsing defaults:
324 fValidation = true;
325 fNotifyCharRefs = false;
326
327 }
328
329 // common scanning methods
330
331 /**
332 * Scans an XML or text declaration.
333 * <p>
334 * <pre>
335 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
336 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
337 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
338 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
339 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
340 * | ('"' ('yes' | 'no') '"'))
341 *
342 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
343 * </pre>
344 *
345 * @param scanningTextDecl True if a text declaration is to
346 * be scanned instead of an XML
347 * declaration.
348 * @param pseudoAttributeValues An array of size 3 to return the version,
349 * encoding and standalone pseudo attribute values
350 * (in that order).
351 *
352 * <strong>Note:</strong> This method uses fString, anything in it
353 * at the time of calling is lost.
354 */
355 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
356 String[] pseudoAttributeValues)
357 throws IOException, XNIException {
358
359 // pseudo-attribute values
360 String version = null;
361 String encoding = null;
362 String standalone = null;
363
364 // scan pseudo-attributes
365 final int STATE_VERSION = 0;
366 final int STATE_ENCODING = 1;
367 final int STATE_STANDALONE = 2;
368 final int STATE_DONE = 3;
369 int state = STATE_VERSION;
370
371 boolean dataFoundForTarget = false;
372 boolean sawSpace = fEntityScanner.skipDeclSpaces();
373 // since pseudoattributes are *not* attributes,
374 // their quotes don't need to be preserved in external parameter entities.
375 // the XMLEntityScanner#scanLiteral method will continue to
376 // emit -1 in such cases when it finds a quote; this is
377 // fine for other methods that parse scanned entities,
378 // but not for the scanning of pseudoattributes. So,
379 // temporarily, we must mark the current entity as not being "literal"
380 XMLEntityManager.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
381 boolean currLiteral = currEnt.literal;
382 currEnt.literal = false;
383 while (fEntityScanner.peekChar() != '?') {
384 dataFoundForTarget = true;
385 String name = scanPseudoAttribute(scanningTextDecl, fString);
386 switch (state) {
387 case STATE_VERSION: {
388 if (name == fVersionSymbol) {
389 if (!sawSpace) {
390 reportFatalError(scanningTextDecl
391 ? "SpaceRequiredBeforeVersionInTextDecl"
392 : "SpaceRequiredBeforeVersionInXMLDecl",
393 null);
394 }
395 version = fString.toString();
396 state = STATE_ENCODING;
397 if (!versionSupported(version)) {
398 reportFatalError(getVersionNotSupportedKey(),
399 new Object[]{version});
400 }
401 }
402 else if (name == fEncodingSymbol) {
403 if (!scanningTextDecl) {
404 reportFatalError("VersionInfoRequired", null);
405 }
406 if (!sawSpace) {
407 reportFatalError(scanningTextDecl
408 ? "SpaceRequiredBeforeEncodingInTextDecl"
409 : "SpaceRequiredBeforeEncodingInXMLDecl",
410 null);
411 }
412 encoding = fString.toString();
413 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
414 }
415 else {
416 if (scanningTextDecl) {
417 reportFatalError("EncodingDeclRequired", null);
418 }
419 else {
420 reportFatalError("VersionInfoRequired", null);
421 }
422 }
423 break;
424 }
425 case STATE_ENCODING: {
426 if (name == fEncodingSymbol) {
427 if (!sawSpace) {
428 reportFatalError(scanningTextDecl
429 ? "SpaceRequiredBeforeEncodingInTextDecl"
430 : "SpaceRequiredBeforeEncodingInXMLDecl",
431 null);
432 }
433 encoding = fString.toString();
434 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
435 // TODO: check encoding name; set encoding on
436 // entity scanner
437 }
438 else if (!scanningTextDecl && name == fStandaloneSymbol) {
439 if (!sawSpace) {
440 reportFatalError("SpaceRequiredBeforeStandalone",
441 null);
442 }
443 standalone = fString.toString();
444 state = STATE_DONE;
445 if (!standalone.equals("yes") && !standalone.equals("no")) {
446 reportFatalError("SDDeclInvalid", new Object[] {standalone});
447 }
448 }
449 else {
450 reportFatalError("EncodingDeclRequired", null);
451 }
452 break;
453 }
454 case STATE_STANDALONE: {
455 if (name == fStandaloneSymbol) {
456 if (!sawSpace) {
457 reportFatalError("SpaceRequiredBeforeStandalone",
458 null);
459 }
460 standalone = fString.toString();
461 state = STATE_DONE;
462 if (!standalone.equals("yes") && !standalone.equals("no")) {
463 reportFatalError("SDDeclInvalid", new Object[] {standalone});
464 }
465 }
466 else {
467 reportFatalError("EncodingDeclRequired", null);
468 }
469 break;
470 }
471 default: {
472 reportFatalError("NoMorePseudoAttributes", null);
473 }
474 }
475 sawSpace = fEntityScanner.skipDeclSpaces();
476 }
477 // restore original literal value
478 if(currLiteral)
479 currEnt.literal = true;
480 // REVISIT: should we remove this error reporting?
481 if (scanningTextDecl && state != STATE_DONE) {
482 reportFatalError("MorePseudoAttributes", null);
483 }
484
485 // If there is no data in the xml or text decl then we fail to report error
486 // for version or encoding info above.
487 if (scanningTextDecl) {
488 if (!dataFoundForTarget && encoding == null) {
489 reportFatalError("EncodingDeclRequired", null);
490 }
491 }
492 else {
493 if (!dataFoundForTarget && version == null) {
494 reportFatalError("VersionInfoRequired", null);
495 }
496 }
497
498 // end
499 if (!fEntityScanner.skipChar('?')) {
500 reportFatalError("XMLDeclUnterminated", null);
501 }
502 if (!fEntityScanner.skipChar('>')) {
503 reportFatalError("XMLDeclUnterminated", null);
504
505 }
506
507 // fill in return array
508 pseudoAttributeValues[0] = version;
509 pseudoAttributeValues[1] = encoding;
510 pseudoAttributeValues[2] = standalone;
511
512 } // scanXMLDeclOrTextDecl(boolean)
513
514 /**
515 * Scans a pseudo attribute.
516 *
517 * @param scanningTextDecl True if scanning this pseudo-attribute for a
518 * TextDecl; false if scanning XMLDecl. This
519 * flag is needed to report the correct type of
520 * error.
521 * @param value The string to fill in with the attribute
522 * value.
523 *
524 * @return The name of the attribute
525 *
526 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
527 * at the time of calling is lost.
528 */
529 public String scanPseudoAttribute(boolean scanningTextDecl,
530 XMLString value)
531 throws IOException, XNIException {
532
533 // REVISIT: This method is used for generic scanning of
534 // pseudo attributes, but since there are only three such
535 // attributes: version, encoding, and standalone there are
536 // for performant ways of scanning them. Every decl must
537 // have a version, and in TextDecls this version must
538 // be followed by an encoding declaration. Also the
539 // methods we invoke on the scanners allow non-ASCII
540 // characters to be parsed in the decls, but since
541 // we don't even know what the actual encoding of the
542 // document is until we scan the encoding declaration
543 // you cannot reliably read any characters outside
544 // of the ASCII range here. -- mrglavas
545 String name = fEntityScanner.scanName();
546 XMLEntityManager.print(fEntityManager.getCurrentEntity());
547 if (name == null) {
548 reportFatalError("PseudoAttrNameExpected", null);
549 }
550 fEntityScanner.skipDeclSpaces();
551 if (!fEntityScanner.skipChar('=')) {
552 reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
553 : "EqRequiredInXMLDecl", new Object[]{name});
554 }
555 fEntityScanner.skipDeclSpaces();
556 int quote = fEntityScanner.peekChar();
557 if (quote != '\'' && quote != '"') {
558 reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
559 : "QuoteRequiredInXMLDecl" , new Object[]{name});
560 }
561 fEntityScanner.scanChar();
562 int c = fEntityScanner.scanLiteral(quote, value);
563 if (c != quote) {
564 fStringBuffer2.clear();
565 do {
566 fStringBuffer2.append(value);
567 if (c != -1) {
568 if (c == '&' || c == '%' || c == '<' || c == ']') {
569 fStringBuffer2.append((char)fEntityScanner.scanChar());
570 }
571 // REVISIT: Even if you could reliably read non-ASCII chars
572 // why bother scanning for surrogates here? Only ASCII chars
573 // match the productions in XMLDecls and TextDecls. -- mrglavas
574 else if (XMLChar.isHighSurrogate(c)) {
575 scanSurrogates(fStringBuffer2);
576 }
577 else if (isInvalidLiteral(c)) {
578 String key = scanningTextDecl
579 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
580 reportFatalError(key,
581 new Object[] {Integer.toString(c, 16)});
582 fEntityScanner.scanChar();
583 }
584 }
585 c = fEntityScanner.scanLiteral(quote, value);
586 } while (c != quote);
587 fStringBuffer2.append(value);
588 value.setValues(fStringBuffer2);
589 }
590 if (!fEntityScanner.skipChar(quote)) {
591 reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
592 : "CloseQuoteMissingInXMLDecl",
593 new Object[]{name});
594 }
595
596 // return
597 return name;
598
599 } // scanPseudoAttribute(XMLString):String
600
601 /**
602 * Scans a processing instruction.
603 * <p>
604 * <pre>
605 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
606 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
607 * </pre>
608 * <strong>Note:</strong> This method uses fString, anything in it
609 * at the time of calling is lost.
610 */
611 protected void scanPI() throws IOException, XNIException {
612
613 // target
614 fReportEntity = false;
615 String target = null;
616 if(fNamespaces) {
617 target = fEntityScanner.scanNCName();
618 } else {
619 target = fEntityScanner.scanName();
620 }
621 if (target == null) {
622 reportFatalError("PITargetRequired", null);
623 }
624
625 // scan data
626 scanPIData(target, fString);
627 fReportEntity = true;
628
629 } // scanPI()
630
631 /**
632 * Scans a processing data. This is needed to handle the situation
633 * where a document starts with a processing instruction whose
634 * target name <em>starts with</em> "xml". (e.g. xmlfoo)
635 *
636 * <strong>Note:</strong> This method uses fStringBuffer, anything in it
637 * at the time of calling is lost.
638 *
639 * @param target The PI target
640 * @param data The string to fill in with the data
641 */
642 protected void scanPIData(String target, XMLString data)
643 throws IOException, XNIException {
644
645 // check target
646 if (target.length() == 3) {
647 char c0 = Character.toLowerCase(target.charAt(0));
648 char c1 = Character.toLowerCase(target.charAt(1));
649 char c2 = Character.toLowerCase(target.charAt(2));
650 if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
651 reportFatalError("ReservedPITarget", null);
652 }
653 }
654
655 // spaces
656 if (!fEntityScanner.skipSpaces()) {
657 if (fEntityScanner.skipString("?>")) {
658 // we found the end, there is no data
659 data.clear();
660 return;
661 }
662 else {
663 if(fNamespaces && fEntityScanner.peekChar() == ':') {
664 fEntityScanner.scanChar();
665 XMLStringBuffer colonName = new XMLStringBuffer(target);
666 colonName.append(":");
667 String str = fEntityScanner.scanName();
668 if (str != null)
669 colonName.append(str);
670 reportFatalError("ColonNotLegalWithNS", new Object[] {colonName.toString()});
671 fEntityScanner.skipSpaces();
672 } else {
673 // if there is data there should be some space
674 reportFatalError("SpaceRequiredInPI", null);
675 }
676 }
677 }
678
679 fStringBuffer.clear();
680 // data
681 if (fEntityScanner.scanData("?>", fStringBuffer)) {
682 do {
683 int c = fEntityScanner.peekChar();
684 if (c != -1) {
685 if (XMLChar.isHighSurrogate(c)) {
686 scanSurrogates(fStringBuffer);
687 }
688 else if (isInvalidLiteral(c)) {
689 reportFatalError("InvalidCharInPI",
690 new Object[]{Integer.toHexString(c)});
691 fEntityScanner.scanChar();
692 }
693 }
694 } while (fEntityScanner.scanData("?>", fStringBuffer));
695 }
696 data.setValues(fStringBuffer);
697
698 } // scanPIData(String,XMLString)
699
700 /**
701 * Scans a comment.
702 * <p>
703 * <pre>
704 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
705 * </pre>
706 * <p>
707 * <strong>Note:</strong> Called after scanning past '<!--'
708 * <strong>Note:</strong> This method uses fString, anything in it
709 * at the time of calling is lost.
710 *
711 * @param text The buffer to fill in with the text.
712 */
713 protected void scanComment(XMLStringBuffer text)
714 throws IOException, XNIException {
715
716 // text
717 // REVISIT: handle invalid character, eof
718 text.clear();
719 while (fEntityScanner.scanData("--", text)) {
720 int c = fEntityScanner.peekChar();
721 if (c != -1) {
722 if (XMLChar.isHighSurrogate(c)) {
723 scanSurrogates(text);
724 }
725 else if (isInvalidLiteral(c)) {
726 reportFatalError("InvalidCharInComment",
727 new Object[] { Integer.toHexString(c) });
728 fEntityScanner.scanChar();
729 }
730 }
731 }
732 if (!fEntityScanner.skipChar('>')) {
733 reportFatalError("DashDashInComment", null);
734 }
735
736 } // scanComment()
737
738 /**
739 * Scans an attribute value and normalizes whitespace converting all
740 * whitespace characters to space characters.
741 *
742 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
743 *
744 * @param value The XMLString to fill in with the value.
745 * @param nonNormalizedValue The XMLString to fill in with the
746 * non-normalized value.
747 * @param atName The name of the attribute being parsed (for error msgs).
748 * @param checkEntities true if undeclared entities should be reported as VC violation,
749 * false if undeclared entities should be reported as WFC violation.
750 * @param eleName The name of element to which this attribute belongs.
751 *
752 * @return true if the non-normalized and normalized value are the same
753 *
754 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
755 * at the time of calling is lost.
756 **/
757 protected boolean scanAttributeValue(XMLString value,
758 XMLString nonNormalizedValue,
759 String atName,
760 boolean checkEntities,String eleName)
761 throws IOException, XNIException
762 {
763 // quote
764 int quote = fEntityScanner.peekChar();
765 if (quote != '\'' && quote != '"') {
766 reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName});
767 }
768
769 fEntityScanner.scanChar();
770 int entityDepth = fEntityDepth;
771
772 int c = fEntityScanner.scanLiteral(quote, value);
773 if (DEBUG_ATTR_NORMALIZATION) {
774 System.out.println("** scanLiteral -> \""
775 + value.toString() + "\"");
776 }
777
778 int fromIndex = 0;
779 if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
780 /** Both the non-normalized and normalized attribute values are equal. **/
781 nonNormalizedValue.setValues(value);
782 int cquote = fEntityScanner.scanChar();
783 if (cquote != quote) {
784 reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
785 }
786 return true;
787 }
788 fStringBuffer2.clear();
789 fStringBuffer2.append(value);
790 normalizeWhitespace(value, fromIndex);
791 if (DEBUG_ATTR_NORMALIZATION) {
792 System.out.println("** normalizeWhitespace -> \""
793 + value.toString() + "\"");
794 }
795 if (c != quote) {
796 fScanningAttribute = true;
797 fStringBuffer.clear();
798 do {
799 fStringBuffer.append(value);
800 if (DEBUG_ATTR_NORMALIZATION) {
801 System.out.println("** value2: \""
802 + fStringBuffer.toString() + "\"");
803 }
804 if (c == '&') {
805 fEntityScanner.skipChar('&');
806 if (entityDepth == fEntityDepth) {
807 fStringBuffer2.append('&');
808 }
809 if (fEntityScanner.skipChar('#')) {
810 if (entityDepth == fEntityDepth) {
811 fStringBuffer2.append('#');
812 }
813 int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
814 if (ch != -1) {
815 if (DEBUG_ATTR_NORMALIZATION) {
816 System.out.println("** value3: \""
817 + fStringBuffer.toString()
818 + "\"");
819 }
820 }
821 }
822 else {
823 String entityName = fEntityScanner.scanName();
824 if (entityName == null) {
825 reportFatalError("NameRequiredInReference", null);
826 }
827 else if (entityDepth == fEntityDepth) {
828 fStringBuffer2.append(entityName);
829 }
830 if (!fEntityScanner.skipChar(';')) {
831 reportFatalError("SemicolonRequiredInReference",
832 new Object []{entityName});
833 }
834 else if (entityDepth == fEntityDepth) {
835 fStringBuffer2.append(';');
836 }
837 if (entityName == fAmpSymbol) {
838 fStringBuffer.append('&');
839 if (DEBUG_ATTR_NORMALIZATION) {
840 System.out.println("** value5: \""
841 + fStringBuffer.toString()
842 + "\"");
843 }
844 }
845 else if (entityName == fAposSymbol) {
846 fStringBuffer.append('\'');
847 if (DEBUG_ATTR_NORMALIZATION) {
848 System.out.println("** value7: \""
849 + fStringBuffer.toString()
850 + "\"");
851 }
852 }
853 else if (entityName == fLtSymbol) {
854 fStringBuffer.append('<');
855 if (DEBUG_ATTR_NORMALIZATION) {
856 System.out.println("** value9: \""
857 + fStringBuffer.toString()
858 + "\"");
859 }
860 }
861 else if (entityName == fGtSymbol) {
862 fStringBuffer.append('>');
863 if (DEBUG_ATTR_NORMALIZATION) {
864 System.out.println("** valueB: \""
865 + fStringBuffer.toString()
866 + "\"");
867 }
868 }
869 else if (entityName == fQuotSymbol) {
870 fStringBuffer.append('"');
871 if (DEBUG_ATTR_NORMALIZATION) {
872 System.out.println("** valueD: \""
873 + fStringBuffer.toString()
874 + "\"");
875 }
876 }
877 else {
878 if (fEntityManager.isExternalEntity(entityName)) {
879 reportFatalError("ReferenceToExternalEntity",
880 new Object[] { entityName });
881 }
882 else {
883 if (!fEntityManager.isDeclaredEntity(entityName)) {
884 //WFC & VC: Entity Declared
885 if (checkEntities) {
886 if (fValidation) {
887 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
888 "EntityNotDeclared",
889 new Object[]{entityName},
890 XMLErrorReporter.SEVERITY_ERROR);
891 }
892 }
893 else {
894 reportFatalError("EntityNotDeclared",
895 new Object[]{entityName});
896 }
897 }
898 fEntityManager.startEntity(entityName, true);
899 }
900 }
901 }
902 }
903 else if (c == '<') {
904 reportFatalError("LessthanInAttValue",
905 new Object[] { eleName, atName });
906 fEntityScanner.scanChar();
907 if (entityDepth == fEntityDepth) {
908 fStringBuffer2.append((char)c);
909 }
910 }
911 else if (c == '%' || c == ']') {
912 fEntityScanner.scanChar();
913 fStringBuffer.append((char)c);
914 if (entityDepth == fEntityDepth) {
915 fStringBuffer2.append((char)c);
916 }
917 if (DEBUG_ATTR_NORMALIZATION) {
918 System.out.println("** valueF: \""
919 + fStringBuffer.toString() + "\"");
920 }
921 }
922 else if (c == '\n' || c == '\r') {
923 fEntityScanner.scanChar();
924 fStringBuffer.append(' ');
925 if (entityDepth == fEntityDepth) {
926 fStringBuffer2.append('\n');
927 }
928 }
929 else if (c != -1 && XMLChar.isHighSurrogate(c)) {
930 fStringBuffer3.clear();
931 if (scanSurrogates(fStringBuffer3)) {
932 fStringBuffer.append(fStringBuffer3);
933 if (entityDepth == fEntityDepth) {
934 fStringBuffer2.append(fStringBuffer3);
935 }
936 if (DEBUG_ATTR_NORMALIZATION) {
937 System.out.println("** valueI: \""
938 + fStringBuffer.toString()
939 + "\"");
940 }
941 }
942 }
943 else if (c != -1 && isInvalidLiteral(c)) {
944 reportFatalError("InvalidCharInAttValue",
945 new Object[] {eleName, atName, Integer.toString(c, 16)});
946 fEntityScanner.scanChar();
947 if (entityDepth == fEntityDepth) {
948 fStringBuffer2.append((char)c);
949 }
950 }
951 c = fEntityScanner.scanLiteral(quote, value);
952 if (entityDepth == fEntityDepth) {
953 fStringBuffer2.append(value);
954 }
955 normalizeWhitespace(value);
956 } while (c != quote || entityDepth != fEntityDepth);
957 fStringBuffer.append(value);
958 if (DEBUG_ATTR_NORMALIZATION) {
959 System.out.println("** valueN: \""
960 + fStringBuffer.toString() + "\"");
961 }
962 value.setValues(fStringBuffer);
963 fScanningAttribute = false;
964 }
965 nonNormalizedValue.setValues(fStringBuffer2);
966
967 // quote
968 int cquote = fEntityScanner.scanChar();
969 if (cquote != quote) {
970 reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
971 }
972 return nonNormalizedValue.equals(value.ch, value.offset, value.length);
973
974 } // scanAttributeValue()
975
976
977 /**
978 * Scans External ID and return the public and system IDs.
979 *
980 * @param identifiers An array of size 2 to return the system id,
981 * and public id (in that order).
982 * @param optionalSystemId Specifies whether the system id is optional.
983 *
984 * <strong>Note:</strong> This method uses fString and fStringBuffer,
985 * anything in them at the time of calling is lost.
986 */
987 protected void scanExternalID(String[] identifiers,
988 boolean optionalSystemId)
989 throws IOException, XNIException {
990
991 String systemId = null;
992 String publicId = null;
993 if (fEntityScanner.skipString("PUBLIC")) {
994 if (!fEntityScanner.skipSpaces()) {
995 reportFatalError("SpaceRequiredAfterPUBLIC", null);
996 }
997 scanPubidLiteral(fString);
998 publicId = fString.toString();
999
1000 if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1001 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
1002 }
1003 }
1004
1005 if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1006 if (publicId == null && !fEntityScanner.skipSpaces()) {
1007 reportFatalError("SpaceRequiredAfterSYSTEM", null);
1008 }
1009 int quote = fEntityScanner.peekChar();
1010 if (quote != '\'' && quote != '"') {
1011 if (publicId != null && optionalSystemId) {
1012 // looks like we don't have any system id
1013 // simply return the public id
1014 identifiers[0] = null;
1015 identifiers[1] = publicId;
1016 return;
1017 }
1018 reportFatalError("QuoteRequiredInSystemID", null);
1019 }
1020 fEntityScanner.scanChar();
1021 XMLString ident = fString;
1022 if (fEntityScanner.scanLiteral(quote, ident) != quote) {
1023 fStringBuffer.clear();
1024 do {
1025 fStringBuffer.append(ident);
1026 int c = fEntityScanner.peekChar();
1027 if (XMLChar.isMarkup(c) || c == ']') {
1028 fStringBuffer.append((char)fEntityScanner.scanChar());
1029 }
1030 } while (fEntityScanner.scanLiteral(quote, ident) != quote);
1031 fStringBuffer.append(ident);
1032 ident = fStringBuffer;
1033 }
1034 systemId = ident.toString();
1035 if (!fEntityScanner.skipChar(quote)) {
1036 reportFatalError("SystemIDUnterminated", null);
1037 }
1038 }
1039
1040 // store result in array
1041 identifiers[0] = systemId;
1042 identifiers[1] = publicId;
1043 }
1044
1045
1046 /**
1047 * Scans public ID literal.
1048 *
1049 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1050 * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1051 *
1052 * The returned string is normalized according to the following rule,
1053 * from http://www.w3.org/TR/REC-xml#dt-pubid:
1054 *
1055 * Before a match is attempted, all strings of white space in the public
1056 * identifier must be normalized to single space characters (#x20), and
1057 * leading and trailing white space must be removed.
1058 *
1059 * @param literal The string to fill in with the public ID literal.
1060 * @return True on success.
1061 *
1062 * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1063 * the time of calling is lost.
1064 */
1065 protected boolean scanPubidLiteral(XMLString literal)
1066 throws IOException, XNIException
1067 {
1068 int quote = fEntityScanner.scanChar();
1069 if (quote != '\'' && quote != '"') {
1070 reportFatalError("QuoteRequiredInPublicID", null);
1071 return false;
1072 }
1073
1074 fStringBuffer.clear();
1075 // skip leading whitespace
1076 boolean skipSpace = true;
1077 boolean dataok = true;
1078 while (true) {
1079 int c = fEntityScanner.scanChar();
1080 if (c == ' ' || c == '\n' || c == '\r') {
1081 if (!skipSpace) {
1082 // take the first whitespace as a space and skip the others
1083 fStringBuffer.append(' ');
1084 skipSpace = true;
1085 }
1086 }
1087 else if (c == quote) {
1088 if (skipSpace) {
1089 // if we finished on a space let's trim it
1090 fStringBuffer.length--;
1091 }
1092 literal.setValues(fStringBuffer);
1093 break;
1094 }
1095 else if (XMLChar.isPubid(c)) {
1096 fStringBuffer.append((char)c);
1097 skipSpace = false;
1098 }
1099 else if (c == -1) {
1100 reportFatalError("PublicIDUnterminated", null);
1101 return false;
1102 }
1103 else {
1104 dataok = false;
1105 reportFatalError("InvalidCharInPublicID",
1106 new Object[]{Integer.toHexString(c)});
1107 }
1108 }
1109 return dataok;
1110 }
1111
1112
1113 /**
1114 * Normalize whitespace in an XMLString converting all whitespace
1115 * characters to space characters.
1116 */
1117 protected void normalizeWhitespace(XMLString value) {
1118 int end = value.offset + value.length;
1119 for (int i = value.offset; i < end; ++i) {
1120 int c = value.ch[i];
1121 // Performance: For XML 1.0 documents take advantage of
1122 // the fact that the only legal characters below 0x20
1123 // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1124 // already determined the well-formedness of these
1125 // characters it is sufficient (and safe) to check
1126 // against 0x20. -- mrglavas
1127 if (c < 0x20) {
1128 value.ch[i] = ' ';
1129 }
1130 }
1131 }
1132
1133 /**
1134 * Normalize whitespace in an XMLString converting all whitespace
1135 * characters to space characters.
1136 */
1137 protected void normalizeWhitespace(XMLString value, int fromIndex) {
1138 int end = value.offset + value.length;
1139 for (int i = value.offset + fromIndex; i < end; ++i) {
1140 int c = value.ch[i];
1141 // Performance: For XML 1.0 documents take advantage of
1142 // the fact that the only legal characters below 0x20
1143 // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1144 // already determined the well-formedness of these
1145 // characters it is sufficient (and safe) to check
1146 // against 0x20. -- mrglavas
1147 if (c < 0x20) {
1148 value.ch[i] = ' ';
1149 }
1150 }
1151 }
1152
1153 /**
1154 * Checks whether this string would be unchanged by normalization.
1155 *
1156 * @return -1 if the value would be unchanged by normalization,
1157 * otherwise the index of the first whitespace character which
1158 * would be transformed.
1159 */
1160 protected int isUnchangedByNormalization(XMLString value) {
1161 int end = value.offset + value.length;
1162 for (int i = value.offset; i < end; ++i) {
1163 int c = value.ch[i];
1164 // Performance: For XML 1.0 documents take advantage of
1165 // the fact that the only legal characters below 0x20
1166 // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1167 // already determined the well-formedness of these
1168 // characters it is sufficient (and safe) to check
1169 // against 0x20. -- mrglavas
1170 if (c < 0x20) {
1171 return i - value.offset;
1172 }
1173 }
1174 return -1;
1175 }
1176
1177 //
1178 // XMLEntityHandler methods
1179 //
1180
1181 /**
1182 * This method notifies of the start of an entity. The document entity
1183 * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1184 * parameter entity names start with '%'; and general entities are just
1185 * specified by their name.
1186 *
1187 * @param name The name of the entity.
1188 * @param identifier The resource identifier.
1189 * @param encoding The auto-detected IANA encoding name of the entity
1190 * stream. This value will be null in those situations
1191 * where the entity encoding is not auto-detected (e.g.
1192 * internal entities or a document entity that is
1193 * parsed from a java.io.Reader).
1194 * @param augs Additional information that may include infoset augmentations
1195 *
1196 * @throws XNIException Thrown by handler to signal an error.
1197 */
1198 public void startEntity(String name,
1199 XMLResourceIdentifier identifier,
1200 String encoding, Augmentations augs) throws XNIException {
1201
1202 // keep track of the entity depth
1203 fEntityDepth++;
1204 // must reset entity scanner
1205 fEntityScanner = fEntityManager.getEntityScanner();
1206
1207 } // startEntity(String,XMLResourceIdentifier,String)
1208
1209 /**
1210 * This method notifies the end of an entity. The document entity has
1211 * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1212 * parameter entity names start with '%'; and general entities are just
1213 * specified by their name.
1214 *
1215 * @param name The name of the entity.
1216 * @param augs Additional information that may include infoset augmentations
1217 *
1218 * @throws XNIException Thrown by handler to signal an error.
1219 */
1220 public void endEntity(String name, Augmentations augs) throws XNIException {
1221
1222 // keep track of the entity depth
1223 fEntityDepth--;
1224
1225 } // endEntity(String)
1226
1227 /**
1228 * Scans a character reference and append the corresponding chars to the
1229 * specified buffer.
1230 *
1231 * <p>
1232 * <pre>
1233 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1234 * </pre>
1235 *
1236 * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1237 * at the time of calling is lost.
1238 *
1239 * @param buf the character buffer to append chars to
1240 * @param buf2 the character buffer to append non-normalized chars to
1241 *
1242 * @return the character value or (-1) on conversion failure
1243 */
1244 protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
1245 throws IOException, XNIException {
1246
1247 // scan hexadecimal value
1248 boolean hex = false;
1249 if (fEntityScanner.skipChar('x')) {
1250 if (buf2 != null) { buf2.append('x'); }
1251 hex = true;
1252 fStringBuffer3.clear();
1253 boolean digit = true;
1254
1255 int c = fEntityScanner.peekChar();
1256 digit = (c >= '0' && c <= '9') ||
1257 (c >= 'a' && c <= 'f') ||
1258 (c >= 'A' && c <= 'F');
1259 if (digit) {
1260 if (buf2 != null) { buf2.append((char)c); }
1261 fEntityScanner.scanChar();
1262 fStringBuffer3.append((char)c);
1263
1264 do {
1265 c = fEntityScanner.peekChar();
1266 digit = (c >= '0' && c <= '9') ||
1267 (c >= 'a' && c <= 'f') ||
1268 (c >= 'A' && c <= 'F');
1269 if (digit) {
1270 if (buf2 != null) { buf2.append((char)c); }
1271 fEntityScanner.scanChar();
1272 fStringBuffer3.append((char)c);
1273 }
1274 } while (digit);
1275 }
1276 else {
1277 reportFatalError("HexdigitRequiredInCharRef", null);
1278 }
1279 }
1280
1281 // scan decimal value
1282 else {
1283 fStringBuffer3.clear();
1284 boolean digit = true;
1285
1286 int c = fEntityScanner.peekChar();
1287 digit = c >= '0' && c <= '9';
1288 if (digit) {
1289 if (buf2 != null) { buf2.append((char)c); }
1290 fEntityScanner.scanChar();
1291 fStringBuffer3.append((char)c);
1292
1293 do {
1294 c = fEntityScanner.peekChar();
1295 digit = c >= '0' && c <= '9';
1296 if (digit) {
1297 if (buf2 != null) { buf2.append((char)c); }
1298 fEntityScanner.scanChar();
1299 fStringBuffer3.append((char)c);
1300 }
1301 } while (digit);
1302 }
1303 else {
1304 reportFatalError("DigitRequiredInCharRef", null);
1305 }
1306 }
1307
1308 // end
1309 if (!fEntityScanner.skipChar(';')) {
1310 reportFatalError("SemicolonRequiredInCharRef", null);
1311 }
1312 if (buf2 != null) { buf2.append(';'); }
1313
1314 // convert string to number
1315 int value = -1;
1316 try {
1317 value = Integer.parseInt(fStringBuffer3.toString(),
1318 hex ? 16 : 10);
1319
1320 // character reference must be a valid XML character
1321 if (isInvalid(value)) {
1322 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1323 if (hex) errorBuf.append('x');
1324 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1325 reportFatalError("InvalidCharRef",
1326 new Object[]{errorBuf.toString()});
1327 }
1328 }
1329 catch (NumberFormatException e) {
1330 // Conversion failed, let -1 value drop through.
1331 // If we end up here, the character reference was invalid.
1332 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1333 if (hex) errorBuf.append('x');
1334 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1335 reportFatalError("InvalidCharRef",
1336 new Object[]{errorBuf.toString()});
1337 }
1338
1339 // append corresponding chars to the given buffer
1340 if (!XMLChar.isSupplemental(value)) {
1341 buf.append((char) value);
1342 }
1343 else {
1344 // character is supplemental, split it into surrogate chars
1345 buf.append(XMLChar.highSurrogate(value));
1346 buf.append(XMLChar.lowSurrogate(value));
1347 }
1348
1349 // char refs notification code
1350 if (fNotifyCharRefs && value != -1) {
1351 String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
1352 if (!fScanningAttribute) {
1353 fCharRefLiteral = literal;
1354 }
1355 }
1356
1357 return value;
1358 }
1359
1360 // returns true if the given character is not
1361 // valid with respect to the version of
1362 // XML understood by this scanner.
1363 protected boolean isInvalid(int value) {
1364 return (XMLChar.isInvalid(value));
1365 } // isInvalid(int): boolean
1366
1367 // returns true if the given character is not
1368 // valid or may not be used outside a character reference
1369 // with respect to the version of XML understood by this scanner.
1370 protected boolean isInvalidLiteral(int value) {
1371 return (XMLChar.isInvalid(value));
1372 } // isInvalidLiteral(int): boolean
1373
1374 // returns true if the given character is
1375 // a valid nameChar with respect to the version of
1376 // XML understood by this scanner.
1377 protected boolean isValidNameChar(int value) {
1378 return (XMLChar.isName(value));
1379 } // isValidNameChar(int): boolean
1380
1381 // returns true if the given character is
1382 // a valid nameStartChar with respect to the version of
1383 // XML understood by this scanner.
1384 protected boolean isValidNameStartChar(int value) {
1385 return (XMLChar.isNameStart(value));
1386 } // isValidNameStartChar(int): boolean
1387
1388 // returns true if the given character is
1389 // a valid NCName character with respect to the version of
1390 // XML understood by this scanner.
1391 protected boolean isValidNCName(int value) {
1392 return (XMLChar.isNCName(value));
1393 } // isValidNCName(int): boolean
1394
1395 // returns true if the given character is
1396 // a valid high surrogate for a nameStartChar
1397 // with respect to the version of XML understood
1398 // by this scanner.
1399 protected boolean isValidNameStartHighSurrogate(int value) {
1400 return false;
1401 } // isValidNameStartHighSurrogate(int): boolean
1402
1403 protected boolean versionSupported(String version ) {
1404 return version.equals("1.0");
1405 } // version Supported
1406
1407 // returns the error message key for unsupported
1408 // versions of XML with respect to the version of
1409 // XML understood by this scanner.
1410 protected String getVersionNotSupportedKey () {
1411 return "VersionNotSupported";
1412 } // getVersionNotSupportedKey: String
1413
1414 /**
1415 * Scans surrogates and append them to the specified buffer.
1416 * <p>
1417 * <strong>Note:</strong> This assumes the current char has already been
1418 * identified as a high surrogate.
1419 *
1420 * @param buf The StringBuffer to append the read surrogates to.
1421 * @return True if it succeeded.
1422 */
1423 protected boolean scanSurrogates(XMLStringBuffer buf)
1424 throws IOException, XNIException {
1425
1426 int high = fEntityScanner.scanChar();
1427 int low = fEntityScanner.peekChar();
1428 if (!XMLChar.isLowSurrogate(low)) {
1429 reportFatalError("InvalidCharInContent",
1430 new Object[] {Integer.toString(high, 16)});
1431 return false;
1432 }
1433 fEntityScanner.scanChar();
1434
1435 // convert surrogates to supplemental character
1436 int c = XMLChar.supplemental((char)high, (char)low);
1437
1438 // supplemental character must be a valid XML character
1439 if (isInvalid(c)) {
1440 reportFatalError("InvalidCharInContent",
1441 new Object[]{Integer.toString(c, 16)});
1442 return false;
1443 }
1444
1445 // fill in the buffer
1446 buf.append((char)high);
1447 buf.append((char)low);
1448
1449 return true;
1450
1451 } // scanSurrogates():boolean
1452
1453
1454 /**
1455 * Convenience function used in all XML scanners.
1456 */
1457 protected void reportFatalError(String msgId, Object[] args)
1458 throws XNIException {
1459 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1460 msgId, args,
1461 XMLErrorReporter.SEVERITY_FATAL_ERROR);
1462 }
1463
1464 // private methods
1465 private void init() {
1466 fEntityScanner = null;
1467 // initialize vars
1468 fEntityDepth = 0;
1469 fReportEntity = true;
1470 fResourceIdentifier.clear();
1471 }
1472
1473 } // class XMLScanner