1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.xerces.impl;
19
20 import java.io.FileOutputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.InputStreamReader;
24 import java.io.OutputStream;
25 import java.io.Reader;
26 import java.io.StringReader;
27 import java.lang.reflect.Method;
28 import java.net.HttpURLConnection;
29 import java.net.URL;
30 import java.net.URLConnection;
31 import java.security.AccessController;
32 import java.security.PrivilegedAction;
33 import java.util.Hashtable;
34 import java.util.Iterator;
35 import java.util.Locale;
36 import java.util.Map;
37 import java.util.Stack;
38 import java.util.StringTokenizer;
39
40 import org.apache.xerces.impl.io.ASCIIReader;
41 import org.apache.xerces.impl.io.Latin1Reader;
42 import org.apache.xerces.impl.io.UCSReader;
43 import org.apache.xerces.impl.io.UTF8Reader;
44 import org.apache.xerces.impl.msg.XMLMessageFormatter;
45 import org.apache.xerces.impl.validation.ValidationManager;
46 import org.apache.xerces.util.AugmentationsImpl;
47 import org.apache.xerces.util.EncodingMap;
48 import org.apache.xerces.util.HTTPInputSource;
49 import org.apache.xerces.util.SecurityManager;
50 import org.apache.xerces.util.SymbolTable;
51 import org.apache.xerces.util.URI;
52 import org.apache.xerces.util.XMLChar;
53 import org.apache.xerces.util.XMLEntityDescriptionImpl;
54 import org.apache.xerces.util.XMLResourceIdentifierImpl;
55 import org.apache.xerces.xni.Augmentations;
56 import org.apache.xerces.xni.XMLResourceIdentifier;
57 import org.apache.xerces.xni.XNIException;
58 import org.apache.xerces.xni.parser.XMLComponent;
59 import org.apache.xerces.xni.parser.XMLComponentManager;
60 import org.apache.xerces.xni.parser.XMLConfigurationException;
61 import org.apache.xerces.xni.parser.XMLEntityResolver;
62 import org.apache.xerces.xni.parser.XMLInputSource;
63
64 /**
65 * The entity manager handles the registration of general and parameter
66 * entities; resolves entities; and starts entities. The entity manager
67 * is a central component in a standard parser configuration and this
68 * class works directly with the entity scanner to manage the underlying
69 * xni.
70 * <p>
71 * This component requires the following features and properties from the
72 * component manager that uses it:
73 * <ul>
74 * <li>http://xml.org/sax/features/validation</li>
75 * <li>http://xml.org/sax/features/external-general-entities</li>
76 * <li>http://xml.org/sax/features/external-parameter-entities</li>
77 * <li>http://apache.org/xml/features/allow-java-encodings</li>
78 * <li>http://apache.org/xml/properties/internal/symbol-table</li>
79 * <li>http://apache.org/xml/properties/internal/error-reporter</li>
80 * <li>http://apache.org/xml/properties/internal/entity-resolver</li>
81 * </ul>
82 *
83 * @xerces.internal
84 *
85 * @author Andy Clark, IBM
86 * @author Arnaud Le Hors, IBM
87 *
88 * @version $Id: XMLEntityManager.java 572040 2007-09-02 17:24:29Z mrglavas $
89 */
90 public class XMLEntityManager
91 implements XMLComponent, XMLEntityResolver {
92
93 //
94 // Constants
95 //
96
97 /** Default buffer size (2048). */
98 public static final int DEFAULT_BUFFER_SIZE = 2048;
99
100 /** Default buffer size before we've finished with the XMLDecl: */
101 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
102
103 /** Default internal entity buffer size (512). */
104 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 512;
105
106 // feature identifiers
107
108 /** Feature identifier: validation. */
109 protected static final String VALIDATION =
110 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
111
112 /** Feature identifier: external general entities. */
113 protected static final String EXTERNAL_GENERAL_ENTITIES =
114 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE;
115
116 /** Feature identifier: external parameter entities. */
117 protected static final String EXTERNAL_PARAMETER_ENTITIES =
118 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE;
119
120 /** Feature identifier: allow Java encodings. */
121 protected static final String ALLOW_JAVA_ENCODINGS =
122 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
123
124 /** Feature identifier: warn on duplicate EntityDef */
125 protected static final String WARN_ON_DUPLICATE_ENTITYDEF =
126 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE;
127
128 /** Feature identifier: standard uri conformant */
129 protected static final String STANDARD_URI_CONFORMANT =
130 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE;
131
132 protected static final String PARSER_SETTINGS =
133 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
134
135 // property identifiers
136
137 /** Property identifier: symbol table. */
138 protected static final String SYMBOL_TABLE =
139 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
140
141 /** Property identifier: error reporter. */
142 protected static final String ERROR_REPORTER =
143 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
144
145 /** Property identifier: entity resolver. */
146 protected static final String ENTITY_RESOLVER =
147 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
148
149 // property identifier: ValidationManager
150 protected static final String VALIDATION_MANAGER =
151 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY;
152
153 /** property identifier: buffer size. */
154 protected static final String BUFFER_SIZE =
155 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY;
156
157 /** property identifier: security manager. */
158 protected static final String SECURITY_MANAGER =
159 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY;
160
161 // recognized features and properties
162
163 /** Recognized features. */
164 private static final String[] RECOGNIZED_FEATURES = {
165 VALIDATION,
166 EXTERNAL_GENERAL_ENTITIES,
167 EXTERNAL_PARAMETER_ENTITIES,
168 ALLOW_JAVA_ENCODINGS,
169 WARN_ON_DUPLICATE_ENTITYDEF,
170 STANDARD_URI_CONFORMANT
171 };
172
173 /** Feature defaults. */
174 private static final Boolean[] FEATURE_DEFAULTS = {
175 null,
176 Boolean.TRUE,
177 Boolean.TRUE,
178 Boolean.FALSE,
179 Boolean.FALSE,
180 Boolean.FALSE
181 };
182
183 /** Recognized properties. */
184 private static final String[] RECOGNIZED_PROPERTIES = {
185 SYMBOL_TABLE,
186 ERROR_REPORTER,
187 ENTITY_RESOLVER,
188 VALIDATION_MANAGER,
189 BUFFER_SIZE,
190 SECURITY_MANAGER,
191 };
192
193 /** Property defaults. */
194 private static final Object[] PROPERTY_DEFAULTS = {
195 null,
196 null,
197 null,
198 null,
199 new Integer(DEFAULT_BUFFER_SIZE),
200 null,
201 };
202
203 private static final String XMLEntity = "[xml]".intern();
204 private static final String DTDEntity = "[dtd]".intern();
205
206 // debugging
207
208 /**
209 * Debug printing of buffer. This debugging flag works best when you
210 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
211 * 64 characters.
212 */
213 private static final boolean DEBUG_BUFFER = false;
214
215 /** Debug some basic entities. */
216 private static final boolean DEBUG_ENTITIES = false;
217
218 /** Debug switching readers for encodings. */
219 private static final boolean DEBUG_ENCODINGS = false;
220
221 // should be diplayed trace resolving messages
222 private static final boolean DEBUG_RESOLVER = false;
223
224 //
225 // Data
226 //
227
228 // features
229
230 /**
231 * Validation. This feature identifier is:
232 * http://xml.org/sax/features/validation
233 */
234 protected boolean fValidation;
235
236 /**
237 * External general entities. This feature identifier is:
238 * http://xml.org/sax/features/external-general-entities
239 */
240 protected boolean fExternalGeneralEntities = true;
241
242 /**
243 * External parameter entities. This feature identifier is:
244 * http://xml.org/sax/features/external-parameter-entities
245 */
246 protected boolean fExternalParameterEntities = true;
247
248 /**
249 * Allow Java encoding names. This feature identifier is:
250 * http://apache.org/xml/features/allow-java-encodings
251 */
252 protected boolean fAllowJavaEncodings;
253
254 /** warn on duplicate Entity declaration.
255 * http://apache.org/xml/features/warn-on-duplicate-entitydef
256 */
257 protected boolean fWarnDuplicateEntityDef;
258
259 /**
260 * standard uri conformant (strict uri).
261 * http://apache.org/xml/features/standard-uri-conformant
262 */
263 protected boolean fStrictURI;
264
265 // properties
266
267 /**
268 * Symbol table. This property identifier is:
269 * http://apache.org/xml/properties/internal/symbol-table
270 */
271 protected SymbolTable fSymbolTable;
272
273 /**
274 * Error reporter. This property identifier is:
275 * http://apache.org/xml/properties/internal/error-reporter
276 */
277 protected XMLErrorReporter fErrorReporter;
278
279 /**
280 * Entity resolver. This property identifier is:
281 * http://apache.org/xml/properties/internal/entity-resolver
282 */
283 protected XMLEntityResolver fEntityResolver;
284
285 /**
286 * Validation manager. This property identifier is:
287 * http://apache.org/xml/properties/internal/validation-manager
288 */
289 protected ValidationManager fValidationManager;
290
291 // settings
292
293 /**
294 * Buffer size. We get this value from a property. The default size
295 * is used if the input buffer size property is not specified.
296 * REVISIT: do we need a property for internal entity buffer size?
297 */
298 protected int fBufferSize = DEFAULT_BUFFER_SIZE;
299
300 // stores defaults for entity expansion limit if it has
301 // been set on the configuration.
302 protected SecurityManager fSecurityManager = null;
303
304 /**
305 * True if the document entity is standalone. This should really
306 * only be set by the document source (e.g. XMLDocumentScanner).
307 */
308 protected boolean fStandalone;
309
310 /**
311 * True if the current document contains parameter entity references.
312 */
313 protected boolean fHasPEReferences;
314
315 // are the entities being parsed in the external subset?
316 // NOTE: this *is not* the same as whether they're external entities!
317 protected boolean fInExternalSubset = false;
318
319 // handlers
320
321 /** Entity handler. */
322 protected XMLEntityHandler fEntityHandler;
323
324 // scanner
325
326 /** Current entity scanner. */
327 protected XMLEntityScanner fEntityScanner;
328
329 /** XML 1.0 entity scanner. */
330 protected XMLEntityScanner fXML10EntityScanner;
331
332 /** XML 1.1 entity scanner. */
333 protected XMLEntityScanner fXML11EntityScanner;
334
335 // entity expansion limit (contains useful data if and only if
336 // fSecurityManager is non-null)
337 protected int fEntityExpansionLimit = 0;
338 // entity currently being expanded:
339 protected int fEntityExpansionCount = 0;
340
341 // entities
342
343 /** Entities. */
344 protected final Hashtable fEntities = new Hashtable();
345
346 /** Entity stack. */
347 protected final Stack fEntityStack = new Stack();
348
349 /** Current entity. */
350 protected ScannedEntity fCurrentEntity;
351
352 // shared context
353
354 /** Shared declared entities. */
355 protected Hashtable fDeclaredEntities;
356
357 // temp vars
358
359 /** Resource identifer. */
360 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
361
362 /** Augmentations for entities. */
363 private final Augmentations fEntityAugs = new AugmentationsImpl();
364
365 /** Pool of byte buffers. */
366 private final ByteBufferPool fByteBufferPool = new ByteBufferPool(fBufferSize);
367
368 /** Temporary storage for the current entity's byte buffer. */
369 private byte[] fTempByteBuffer = null;
370
371 /** Pool of character buffers. */
372 private final CharacterBufferPool fCharacterBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE);
373
374 //
375 // Constructors
376 //
377
378 /** Default constructor. */
379 public XMLEntityManager() {
380 this(null);
381 } // <init>()
382
383 /**
384 * Constructs an entity manager that shares the specified entity
385 * declarations during each parse.
386 * <p>
387 * <strong>REVISIT:</strong> We might want to think about the "right"
388 * way to expose the list of declared entities. For now, the knowledge
389 * how to access the entity declarations is implicit.
390 */
391 public XMLEntityManager(XMLEntityManager entityManager) {
392
393 // save shared entity declarations
394 fDeclaredEntities = entityManager != null
395 ? entityManager.getDeclaredEntities() : null;
396
397 setScannerVersion(Constants.XML_VERSION_1_0);
398 } // <init>(XMLEntityManager)
399
400 //
401 // Public methods
402 //
403
404 /**
405 * Sets whether the document entity is standalone.
406 *
407 * @param standalone True if document entity is standalone.
408 */
409 public void setStandalone(boolean standalone) {
410 fStandalone = standalone;
411 } // setStandalone(boolean)
412
413 /** Returns true if the document entity is standalone. */
414 public boolean isStandalone() {
415 return fStandalone;
416 } // isStandalone():boolean
417
418 /**
419 * Notifies the entity manager that the current document
420 * being processed contains parameter entity references.
421 */
422 final void notifyHasPEReferences() {
423 fHasPEReferences = true;
424 } // notifyHasPEReferences
425
426 /**
427 * Returns true if the document contains parameter entity references.
428 */
429 final boolean hasPEReferences() {
430 return fHasPEReferences;
431 } // hasPEReferences():boolean
432
433 /**
434 * Sets the entity handler. When an entity starts and ends, the
435 * entity handler is notified of the change.
436 *
437 * @param entityHandler The new entity handler.
438 */
439 public void setEntityHandler(XMLEntityHandler entityHandler) {
440 fEntityHandler = entityHandler;
441 } // setEntityHandler(XMLEntityHandler)
442
443 // this simply returns the fResourceIdentifier object;
444 // this should only be used with caution by callers that
445 // carefully manage the entity manager's behaviour, so that
446 // this doesn't returning meaningless or misleading data.
447 // @return a reference to the current fResourceIdentifier object
448 public XMLResourceIdentifier getCurrentResourceIdentifier() {
449 return fResourceIdentifier;
450 }
451
452 // this simply returns the fCurrentEntity object;
453 // this should only be used with caution by callers that
454 // carefully manage the entity manager's behaviour, so that
455 // this doesn't returning meaningless or misleading data.
456 // @return a reference to the current fCurrentEntity object
457 public ScannedEntity getCurrentEntity() {
458 return fCurrentEntity;
459 }
460
461 /**
462 * Adds an internal entity declaration.
463 * <p>
464 * <strong>Note:</strong> This method ignores subsequent entity
465 * declarations.
466 * <p>
467 * <strong>Note:</strong> The name should be a unique symbol. The
468 * SymbolTable can be used for this purpose.
469 *
470 * @param name The name of the entity.
471 * @param text The text of the entity.
472 *
473 * @see SymbolTable
474 */
475 public void addInternalEntity(String name, String text) {
476 if (!fEntities.containsKey(name)) {
477 Entity entity = new InternalEntity(name, text, fInExternalSubset);
478 fEntities.put(name, entity);
479 }
480 else{
481 if(fWarnDuplicateEntityDef){
482 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
483 "MSG_DUPLICATE_ENTITY_DEFINITION",
484 new Object[]{ name },
485 XMLErrorReporter.SEVERITY_WARNING );
486 }
487 }
488
489 } // addInternalEntity(String,String)
490
491 /**
492 * Adds an external entity declaration.
493 * <p>
494 * <strong>Note:</strong> This method ignores subsequent entity
495 * declarations.
496 * <p>
497 * <strong>Note:</strong> The name should be a unique symbol. The
498 * SymbolTable can be used for this purpose.
499 *
500 * @param name The name of the entity.
501 * @param publicId The public identifier of the entity.
502 * @param literalSystemId The system identifier of the entity.
503 * @param baseSystemId The base system identifier of the entity.
504 * This is the system identifier of the entity
505 * where <em>the entity being added</em> and
506 * is used to expand the system identifier when
507 * the system identifier is a relative URI.
508 * When null the system identifier of the first
509 * external entity on the stack is used instead.
510 *
511 * @see SymbolTable
512 */
513 public void addExternalEntity(String name,
514 String publicId, String literalSystemId,
515 String baseSystemId) throws IOException {
516 if (!fEntities.containsKey(name)) {
517 if (baseSystemId == null) {
518 // search for the first external entity on the stack
519 int size = fEntityStack.size();
520 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) {
521 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId();
522 }
523 for (int i = size - 1; i >= 0 ; i--) {
524 ScannedEntity externalEntity =
525 (ScannedEntity)fEntityStack.elementAt(i);
526 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) {
527 baseSystemId = externalEntity.entityLocation.getExpandedSystemId();
528 break;
529 }
530 }
531 }
532 Entity entity = new ExternalEntity(name,
533 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId,
534 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset);
535 fEntities.put(name, entity);
536 }
537 else{
538 if(fWarnDuplicateEntityDef){
539 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
540 "MSG_DUPLICATE_ENTITY_DEFINITION",
541 new Object[]{ name },
542 XMLErrorReporter.SEVERITY_WARNING );
543 }
544 }
545
546 } // addExternalEntity(String,String,String,String)
547
548 /**
549 * Checks whether an entity given by name is external.
550 *
551 * @param entityName The name of the entity to check.
552 * @return True if the entity is external, false otherwise
553 * (including when the entity is not declared).
554 */
555 public boolean isExternalEntity(String entityName) {
556
557 Entity entity = (Entity)fEntities.get(entityName);
558 if (entity == null) {
559 return false;
560 }
561 return entity.isExternal();
562 }
563
564 /**
565 * Checks whether the declaration of an entity given by name is
566 // in the external subset.
567 *
568 * @param entityName The name of the entity to check.
569 * @return True if the entity was declared in the external subset, false otherwise
570 * (including when the entity is not declared).
571 */
572 public boolean isEntityDeclInExternalSubset(String entityName) {
573
574 Entity entity = (Entity)fEntities.get(entityName);
575 if (entity == null) {
576 return false;
577 }
578 return entity.isEntityDeclInExternalSubset();
579 }
580
581 /**
582 * Adds an unparsed entity declaration.
583 * <p>
584 * <strong>Note:</strong> This method ignores subsequent entity
585 * declarations.
586 * <p>
587 * <strong>Note:</strong> The name should be a unique symbol. The
588 * SymbolTable can be used for this purpose.
589 *
590 * @param name The name of the entity.
591 * @param publicId The public identifier of the entity.
592 * @param systemId The system identifier of the entity.
593 * @param notation The name of the notation.
594 *
595 * @see SymbolTable
596 */
597 public void addUnparsedEntity(String name,
598 String publicId, String systemId,
599 String baseSystemId, String notation) {
600 if (!fEntities.containsKey(name)) {
601 Entity entity = new ExternalEntity(name,
602 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null),
603 notation, fInExternalSubset);
604 fEntities.put(name, entity);
605 }
606 else{
607 if(fWarnDuplicateEntityDef){
608 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
609 "MSG_DUPLICATE_ENTITY_DEFINITION",
610 new Object[]{ name },
611 XMLErrorReporter.SEVERITY_WARNING );
612 }
613 }
614 } // addUnparsedEntity(String,String,String,String)
615
616 /**
617 * Checks whether an entity given by name is unparsed.
618 *
619 * @param entityName The name of the entity to check.
620 * @return True if the entity is unparsed, false otherwise
621 * (including when the entity is not declared).
622 */
623 public boolean isUnparsedEntity(String entityName) {
624
625 Entity entity = (Entity)fEntities.get(entityName);
626 if (entity == null) {
627 return false;
628 }
629 return entity.isUnparsed();
630 }
631
632 /**
633 * Checks whether an entity given by name is declared.
634 *
635 * @param entityName The name of the entity to check.
636 * @return True if the entity is declared, false otherwise.
637 */
638 public boolean isDeclaredEntity(String entityName) {
639
640 Entity entity = (Entity)fEntities.get(entityName);
641 return entity != null;
642 }
643
644 /**
645 * Resolves the specified public and system identifiers. This
646 * method first attempts to resolve the entity based on the
647 * EntityResolver registered by the application. If no entity
648 * resolver is registered or if the registered entity handler
649 * is unable to resolve the entity, then default entity
650 * resolution will occur.
651 *
652 * @param resourceIdentifier The XMLResourceIdentifier for the resource to resolve.
653 *
654 * @return Returns an input source that wraps the resolved entity.
655 * This method will never return null.
656 *
657 * @throws IOException Thrown on i/o error.
658 * @throws XNIException Thrown by entity resolver to signal an error.
659 */
660 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier)
661 throws IOException, XNIException {
662 if(resourceIdentifier == null ) return null;
663 String publicId = resourceIdentifier.getPublicId();
664 String literalSystemId = resourceIdentifier.getLiteralSystemId();
665 String baseSystemId = resourceIdentifier.getBaseSystemId();
666 String expandedSystemId = resourceIdentifier.getExpandedSystemId();
667 // if no base systemId given, assume that it's relative
668 // to the systemId of the current scanned entity
669 // Sometimes the system id is not (properly) expanded.
670 // We need to expand the system id if:
671 // a. the expanded one was null; or
672 // b. the base system id was null, but becomes non-null from the current entity.
673 boolean needExpand = (expandedSystemId == null);
674 // REVISIT: why would the baseSystemId ever be null? if we
675 // didn't have to make this check we wouldn't have to reuse the
676 // fXMLResourceIdentifier object...
677 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) {
678 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId();
679 if (baseSystemId != null)
680 needExpand = true;
681 }
682 if (needExpand)
683 expandedSystemId = expandSystemId(literalSystemId, baseSystemId, false);
684
685 // give the entity resolver a chance
686 XMLInputSource xmlInputSource = null;
687 if (fEntityResolver != null) {
688 resourceIdentifier.setBaseSystemId(baseSystemId);
689 resourceIdentifier.setExpandedSystemId(expandedSystemId);
690 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier);
691 }
692
693 // do default resolution
694 // REVISIT: what's the correct behavior if the user provided an entity
695 // resolver (fEntityResolver != null), but resolveEntity doesn't return
696 // an input source (xmlInputSource == null)?
697 // do we do default resolution, or do we just return null? -SG
698 if (xmlInputSource == null) {
699 // REVISIT: when systemId is null, I think we should return null.
700 // is this the right solution? -SG
701 //if (systemId != null)
702 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId);
703 }
704
705 if (DEBUG_RESOLVER) {
706 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")");
707 System.err.println(" = " + xmlInputSource);
708 }
709
710 return xmlInputSource;
711
712 } // resolveEntity(XMLResourceIdentifier):XMLInputSource
713
714 /**
715 * Starts a named entity.
716 *
717 * @param entityName The name of the entity to start.
718 * @param literal True if this entity is started within a literal
719 * value.
720 *
721 * @throws IOException Thrown on i/o error.
722 * @throws XNIException Thrown by entity handler to signal an error.
723 */
724 public void startEntity(String entityName, boolean literal)
725 throws IOException, XNIException {
726
727 // was entity declared?
728 Entity entity = (Entity)fEntities.get(entityName);
729 if (entity == null) {
730 if (fEntityHandler != null) {
731 String encoding = null;
732 fResourceIdentifier.clear();
733 fEntityAugs.removeAllItems();
734 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
735 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs);
736 fEntityAugs.removeAllItems();
737 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
738 fEntityHandler.endEntity(entityName, fEntityAugs);
739 }
740 return;
741 }
742
743 // should we skip external entities?
744 boolean external = entity.isExternal();
745 if (external && (fValidationManager == null || !fValidationManager.isCachedDTD())) {
746 boolean unparsed = entity.isUnparsed();
747 boolean parameter = entityName.startsWith("%");
748 boolean general = !parameter;
749 if (unparsed || (general && !fExternalGeneralEntities) ||
750 (parameter && !fExternalParameterEntities)) {
751 if (fEntityHandler != null) {
752 fResourceIdentifier.clear();
753 final String encoding = null;
754 ExternalEntity externalEntity = (ExternalEntity)entity;
755 //REVISIT: since we're storing expandedSystemId in the
756 // externalEntity, how could this have got here if it wasn't already
757 // expanded??? - neilg
758 String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null);
759 String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null);
760 String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId, false);
761 fResourceIdentifier.setValues(
762 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null),
763 extLitSysId, extBaseSysId, expandedSystemId);
764 fEntityAugs.removeAllItems();
765 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
766 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs);
767 fEntityAugs.removeAllItems();
768 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
769 fEntityHandler.endEntity(entityName, fEntityAugs);
770 }
771 return;
772 }
773 }
774
775 // is entity recursive?
776 int size = fEntityStack.size();
777 for (int i = size; i >= 0; i--) {
778 Entity activeEntity = i == size
779 ? fCurrentEntity
780 : (Entity)fEntityStack.elementAt(i);
781 if (activeEntity.name == entityName) {
782 StringBuffer path = new StringBuffer(entityName);
783 for (int j = i + 1; j < size; j++) {
784 activeEntity = (Entity)fEntityStack.elementAt(j);
785 path.append(" -> ");
786 path.append(activeEntity.name);
787 }
788 path.append(" -> ");
789 path.append(fCurrentEntity.name);
790 path.append(" -> ");
791 path.append(entityName);
792 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
793 "RecursiveReference",
794 new Object[] { entityName, path.toString() },
795 XMLErrorReporter.SEVERITY_FATAL_ERROR);
796 if (fEntityHandler != null) {
797 fResourceIdentifier.clear();
798 final String encoding = null;
799 if (external) {
800 ExternalEntity externalEntity = (ExternalEntity)entity;
801 // REVISIT: for the same reason above...
802 String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null);
803 String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null);
804 String expandedSystemId = expandSystemId(extLitSysId, extBaseSysId, false);
805 fResourceIdentifier.setValues(
806 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null),
807 extLitSysId, extBaseSysId, expandedSystemId);
808 }
809 fEntityAugs.removeAllItems();
810 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
811 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs);
812 fEntityAugs.removeAllItems();
813 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE);
814 fEntityHandler.endEntity(entityName, fEntityAugs);
815 }
816 return;
817 }
818 }
819
820 // resolve external entity
821 XMLInputSource xmlInputSource = null;
822 if (external) {
823 ExternalEntity externalEntity = (ExternalEntity)entity;
824 xmlInputSource = resolveEntity(externalEntity.entityLocation);
825 }
826
827 // wrap internal entity
828 else {
829 InternalEntity internalEntity = (InternalEntity)entity;
830 Reader reader = new StringReader(internalEntity.text);
831 xmlInputSource = new XMLInputSource(null, null, null, reader, null);
832 }
833
834 // start the entity
835 startEntity(entityName, xmlInputSource, literal, external);
836
837 } // startEntity(String,boolean)
838
839 /**
840 * Starts the document entity. The document entity has the "[xml]"
841 * pseudo-name.
842 *
843 * @param xmlInputSource The input source of the document entity.
844 *
845 * @throws IOException Thrown on i/o error.
846 * @throws XNIException Thrown by entity handler to signal an error.
847 */
848 public void startDocumentEntity(XMLInputSource xmlInputSource)
849 throws IOException, XNIException {
850 startEntity(XMLEntity, xmlInputSource, false, true);
851 } // startDocumentEntity(XMLInputSource)
852
853 /**
854 * Starts the DTD entity. The DTD entity has the "[dtd]"
855 * pseudo-name.
856 *
857 * @param xmlInputSource The input source of the DTD entity.
858 *
859 * @throws IOException Thrown on i/o error.
860 * @throws XNIException Thrown by entity handler to signal an error.
861 */
862 public void startDTDEntity(XMLInputSource xmlInputSource)
863 throws IOException, XNIException {
864 startEntity(DTDEntity, xmlInputSource, false, true);
865 } // startDTDEntity(XMLInputSource)
866
867 // indicate start of external subset so that
868 // location of entity decls can be tracked
869 public void startExternalSubset() {
870 fInExternalSubset = true;
871 }
872
873 public void endExternalSubset() {
874 fInExternalSubset = false;
875 }
876
877 /**
878 * Starts an entity.
879 * <p>
880 * This method can be used to insert an application defined XML
881 * entity stream into the parsing stream.
882 *
883 * @param name The name of the entity.
884 * @param xmlInputSource The input source of the entity.
885 * @param literal True if this entity is started within a
886 * literal value.
887 * @param isExternal whether this entity should be treated as an internal or external entity.
888 *
889 * @throws IOException Thrown on i/o error.
890 * @throws XNIException Thrown by entity handler to signal an error.
891 */
892 public void startEntity(String name,
893 XMLInputSource xmlInputSource,
894 boolean literal, boolean isExternal)
895 throws IOException, XNIException {
896
897 String encoding = setupCurrentEntity(name, xmlInputSource, literal, isExternal);
898
899 //when entity expansion limit is set by the Application, we need to
900 //check for the entity expansion limit set by the parser, if number of entity
901 //expansions exceeds the entity expansion limit, parser will throw fatal error.
902 // Note that this is intentionally unbalanced; it counts
903 // the number of expansions *per document*.
904 if( fSecurityManager != null && fEntityExpansionCount++ > fEntityExpansionLimit ){
905 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
906 "EntityExpansionLimitExceeded",
907 new Object[]{new Integer(fEntityExpansionLimit) },
908 XMLErrorReporter.SEVERITY_FATAL_ERROR );
909 // is there anything better to do than reset the counter?
910 // at least one can envision debugging applications where this might
911 // be useful...
912 fEntityExpansionCount = 0;
913 }
914
915 // call handler
916 if (fEntityHandler != null) {
917 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null);
918 }
919
920 } // startEntity(String,XMLInputSource)
921
922 /**
923 * This method uses the passed-in XMLInputSource to make
924 * fCurrentEntity usable for reading.
925 * @param name name of the entity (XML is it's the document entity)
926 * @param xmlInputSource the input source, with sufficient information
927 * to begin scanning characters.
928 * @param literal True if this entity is started within a
929 * literal value.
930 * @param isExternal whether this entity should be treated as an internal or external entity.
931 * @throws IOException if anything can't be read
932 * XNIException If any parser-specific goes wrong.
933 * @return the encoding of the new entity or null if a character stream was employed
934 */
935 public String setupCurrentEntity(String name, XMLInputSource xmlInputSource,
936 boolean literal, boolean isExternal)
937 throws IOException, XNIException {
938 // get information
939
940 final String publicId = xmlInputSource.getPublicId();
941 String literalSystemId = xmlInputSource.getSystemId();
942 String baseSystemId = xmlInputSource.getBaseSystemId();
943 String encoding = xmlInputSource.getEncoding();
944 final boolean encodingExternallySpecified = (encoding != null);
945 Boolean isBigEndian = null;
946 fTempByteBuffer = null;
947
948 // create reader
949 InputStream stream = null;
950 Reader reader = xmlInputSource.getCharacterStream();
951 // First chance checking strict URI
952 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI);
953 if (baseSystemId == null) {
954 baseSystemId = expandedSystemId;
955 }
956 if (reader == null) {
957 stream = xmlInputSource.getByteStream();
958 if (stream == null) {
959 URL location = new URL(expandedSystemId);
960 URLConnection connect = location.openConnection();
961 if (!(connect instanceof HttpURLConnection)) {
962 stream = connect.getInputStream();
963 }
964 else {
965 boolean followRedirects = true;
966
967 // setup URLConnection if we have an HTTPInputSource
968 if (xmlInputSource instanceof HTTPInputSource) {
969 final HttpURLConnection urlConnection = (HttpURLConnection) connect;
970 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource;
971
972 // set request properties
973 Iterator propIter = httpInputSource.getHTTPRequestProperties();
974 while (propIter.hasNext()) {
975 Map.Entry entry = (Map.Entry) propIter.next();
976 urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue());
977 }
978
979 // set preference for redirection
980 followRedirects = httpInputSource.getFollowHTTPRedirects();
981 if (!followRedirects) {
982 setInstanceFollowRedirects(urlConnection, followRedirects);
983 }
984 }
985
986 stream = connect.getInputStream();
987
988 // REVISIT: If the URLConnection has external encoding
989 // information, we should be reading it here. It's located
990 // in the charset parameter of Content-Type. -- mrglavas
991
992 if (followRedirects) {
993 String redirect = connect.getURL().toString();
994 // E43: Check if the URL was redirected, and then
995 // update literal and expanded system IDs if needed.
996 if (!redirect.equals(expandedSystemId)) {
997 literalSystemId = redirect;
998 expandedSystemId = redirect;
999 }
1000 }
1001 }
1002 }
1003 // wrap this stream in RewindableInputStream
1004 stream = new RewindableInputStream(stream);
1005
1006 // perform auto-detect of encoding if necessary
1007 if (encoding == null) {
1008 // read first four bytes and determine encoding
1009 final byte[] b4 = new byte[4];
1010 int count = 0;
1011 for (; count<4; count++ ) {
1012 b4[count] = (byte)stream.read();
1013 }
1014 if (count == 4) {
1015 Object [] encodingDesc = getEncodingName(b4, count);
1016 encoding = (String)(encodingDesc[0]);
1017 isBigEndian = (Boolean)(encodingDesc[1]);
1018
1019 stream.reset();
1020 // Special case UTF-8 files with BOM created by Microsoft
1021 // tools. It's more efficient to consume the BOM than make
1022 // the reader perform extra checks. -Ac
1023 if (count > 2 && encoding.equals("UTF-8")) {
1024 int b0 = b4[0] & 0xFF;
1025 int b1 = b4[1] & 0xFF;
1026 int b2 = b4[2] & 0xFF;
1027 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1028 // ignore first three bytes...
1029 stream.skip(3);
1030 }
1031 }
1032 reader = createReader(stream, encoding, isBigEndian);
1033 }
1034 else {
1035 reader = createReader(stream, encoding, isBigEndian);
1036 }
1037 }
1038
1039 // use specified encoding
1040 else {
1041 encoding = encoding.toUpperCase(Locale.ENGLISH);
1042
1043 // If encoding is UTF-8, consume BOM if one is present.
1044 if (encoding.equals("UTF-8")) {
1045 final int[] b3 = new int[3];
1046 int count = 0;
1047 for (; count < 3; ++count) {
1048 b3[count] = stream.read();
1049 if (b3[count] == -1)
1050 break;
1051 }
1052 if (count == 3) {
1053 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
1054 // First three bytes are not BOM, so reset.
1055 stream.reset();
1056 }
1057 }
1058 else {
1059 stream.reset();
1060 }
1061 reader = createReader(stream, encoding, isBigEndian);
1062 }
1063 // If encoding is UTF-16, we still need to read the first four bytes
1064 // in order to discover the byte order.
1065 else if (encoding.equals("UTF-16")) {
1066 final int[] b4 = new int[4];
1067 int count = 0;
1068 for (; count < 4; ++count) {
1069 b4[count] = stream.read();
1070 if (b4[count] == -1)
1071 break;
1072 }
1073 stream.reset();
1074
1075 String utf16Encoding = "UTF-16";
1076 if (count >= 2) {
1077 final int b0 = b4[0];
1078 final int b1 = b4[1];
1079 if (b0 == 0xFE && b1 == 0xFF) {
1080 // UTF-16, big-endian
1081 utf16Encoding = "UTF-16BE";
1082 isBigEndian = Boolean.TRUE;
1083 }
1084 else if (b0 == 0xFF && b1 == 0xFE) {
1085 // UTF-16, little-endian
1086 utf16Encoding = "UTF-16LE";
1087 isBigEndian = Boolean.FALSE;
1088 }
1089 else if (count == 4) {
1090 final int b2 = b4[2];
1091 final int b3 = b4[3];
1092 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
1093 // UTF-16, big-endian, no BOM
1094 utf16Encoding = "UTF-16BE";
1095 isBigEndian = Boolean.TRUE;
1096 }
1097 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
1098 // UTF-16, little-endian, no BOM
1099 utf16Encoding = "UTF-16LE";
1100 isBigEndian = Boolean.FALSE;
1101 }
1102 }
1103 }
1104 reader = createReader(stream, utf16Encoding, isBigEndian);
1105 }
1106 // If encoding is UCS-4, we still need to read the first four bytes
1107 // in order to discover the byte order.
1108 else if (encoding.equals("ISO-10646-UCS-4")) {
1109 final int[] b4 = new int[4];
1110 int count = 0;
1111 for (; count < 4; ++count) {
1112 b4[count] = stream.read();
1113 if (b4[count] == -1)
1114 break;
1115 }
1116 stream.reset();
1117
1118 // Ignore unusual octet order for now.
1119 if (count == 4) {
1120 // UCS-4, big endian (1234)
1121 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) {
1122 isBigEndian = Boolean.TRUE;
1123 }
1124 // UCS-4, little endian (1234)
1125 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) {
1126 isBigEndian = Boolean.FALSE;
1127 }
1128 }
1129 reader = createReader(stream, encoding, isBigEndian);
1130 }
1131 // If encoding is UCS-2, we still need to read the first four bytes
1132 // in order to discover the byte order.
1133 else if (encoding.equals("ISO-10646-UCS-2")) {
1134 final int[] b4 = new int[4];
1135 int count = 0;
1136 for (; count < 4; ++count) {
1137 b4[count] = stream.read();
1138 if (b4[count] == -1)
1139 break;
1140 }
1141 stream.reset();
1142
1143 if (count == 4) {
1144 // UCS-2, big endian
1145 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) {
1146 isBigEndian = Boolean.TRUE;
1147 }
1148 // UCS-2, little endian
1149 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) {
1150 isBigEndian = Boolean.FALSE;
1151 }
1152 }
1153 reader = createReader(stream, encoding, isBigEndian);
1154 }
1155 else {
1156 reader = createReader(stream, encoding, isBigEndian);
1157 }
1158 }
1159
1160 // read one character at a time so we don't jump too far
1161 // ahead, converting characters from the byte stream in
1162 // the wrong encoding
1163 if (DEBUG_ENCODINGS) {
1164 System.out.println("$$$ no longer wrapping reader in OneCharReader");
1165 }
1166 //reader = new OneCharReader(reader);
1167 }
1168
1169 // We've seen a new Reader.
1170 // Push it on the stack so we can close it later.
1171 fReaderStack.push(reader);
1172
1173 // push entity on stack
1174 if (fCurrentEntity != null) {
1175 fEntityStack.push(fCurrentEntity);
1176 }
1177
1178 // create entity
1179 fCurrentEntity = new ScannedEntity(name,
1180 new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId),
1181 stream, reader, fTempByteBuffer, encoding, literal, false, isExternal);
1182 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified);
1183 fEntityScanner.setCurrentEntity(fCurrentEntity);
1184 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId);
1185 return encoding;
1186 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String
1187
1188 // set version of scanner to use
1189 public void setScannerVersion(short version) {
1190 if(version == Constants.XML_VERSION_1_0) {
1191 if(fXML10EntityScanner == null) {
1192 fXML10EntityScanner = new XMLEntityScanner();
1193 }
1194 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter);
1195 fEntityScanner = fXML10EntityScanner;
1196 fEntityScanner.setCurrentEntity(fCurrentEntity);
1197 } else {
1198 if(fXML11EntityScanner == null) {
1199 fXML11EntityScanner = new XML11EntityScanner();
1200 }
1201 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter);
1202 fEntityScanner = fXML11EntityScanner;
1203 fEntityScanner.setCurrentEntity(fCurrentEntity);
1204 }
1205 } // setScannerVersion(short)
1206
1207 /** Returns the entity scanner. */
1208 public XMLEntityScanner getEntityScanner() {
1209 if(fEntityScanner == null) {
1210 // default to 1.0
1211 if(fXML10EntityScanner == null) {
1212 fXML10EntityScanner = new XMLEntityScanner();
1213 }
1214 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter);
1215 fEntityScanner = fXML10EntityScanner;
1216 }
1217 return fEntityScanner;
1218 } // getEntityScanner():XMLEntityScanner
1219
1220 // A stack containing all the open readers
1221 protected Stack fReaderStack = new Stack();
1222
1223 /**
1224 * Close all opened InputStreams and Readers opened by this parser.
1225 */
1226 public void closeReaders() {
1227 // close all readers
1228 for (int i = fReaderStack.size()-1; i >= 0; i--) {
1229 try {
1230 ((Reader)fReaderStack.pop()).close();
1231 } catch (IOException e) {
1232 // ignore
1233 }
1234 }
1235 }
1236
1237 //
1238 // XMLComponent methods
1239 //
1240
1241 /**
1242 * Resets the component. The component can query the component manager
1243 * about any features and properties that affect the operation of the
1244 * component.
1245 *
1246 * @param componentManager The component manager.
1247 *
1248 * @throws SAXException Thrown by component on initialization error.
1249 * For example, if a feature or property is
1250 * required for the operation of the component, the
1251 * component manager may throw a
1252 * SAXNotRecognizedException or a
1253 * SAXNotSupportedException.
1254 */
1255 public void reset(XMLComponentManager componentManager)
1256 throws XMLConfigurationException {
1257
1258 boolean parser_settings;
1259 try {
1260 parser_settings = componentManager.getFeature(PARSER_SETTINGS);
1261 } catch (XMLConfigurationException e) {
1262 parser_settings = true;
1263 }
1264
1265 if (!parser_settings) {
1266 // parser settings have not been changed
1267 reset();
1268 return;
1269 }
1270
1271 // sax features
1272 try {
1273 fValidation = componentManager.getFeature(VALIDATION);
1274 }
1275 catch (XMLConfigurationException e) {
1276 fValidation = false;
1277 }
1278 try {
1279 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES);
1280 }
1281 catch (XMLConfigurationException e) {
1282 fExternalGeneralEntities = true;
1283 }
1284 try {
1285 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES);
1286 }
1287 catch (XMLConfigurationException e) {
1288 fExternalParameterEntities = true;
1289 }
1290
1291 // xerces features
1292 try {
1293 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS);
1294 }
1295 catch (XMLConfigurationException e) {
1296 fAllowJavaEncodings = false;
1297 }
1298
1299 try {
1300 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF);
1301 }
1302 catch (XMLConfigurationException e) {
1303 fWarnDuplicateEntityDef = false;
1304 }
1305
1306 try {
1307 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT);
1308 }
1309 catch (XMLConfigurationException e) {
1310 fStrictURI = false;
1311 }
1312
1313 // xerces properties
1314 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
1315 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
1316 try {
1317 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER);
1318 }
1319 catch (XMLConfigurationException e) {
1320 fEntityResolver = null;
1321 }
1322 try {
1323 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER);
1324 }
1325 catch (XMLConfigurationException e) {
1326 fValidationManager = null;
1327 }
1328 try {
1329 fSecurityManager = (SecurityManager)componentManager.getProperty(SECURITY_MANAGER);
1330 }
1331 catch (XMLConfigurationException e) {
1332 fSecurityManager = null;
1333 }
1334
1335 // reset general state
1336 reset();
1337
1338 } // reset(XMLComponentManager)
1339
1340 // reset general state. Should not be called other than by
1341 // a class acting as a component manager but not
1342 // implementing that interface for whatever reason.
1343 public void reset() {
1344 fEntityExpansionLimit = (fSecurityManager != null)?fSecurityManager.getEntityExpansionLimit():0;
1345
1346 // initialize state
1347 fStandalone = false;
1348 fHasPEReferences = false;
1349 fEntities.clear();
1350 fEntityStack.removeAllElements();
1351 fEntityExpansionCount = 0;
1352
1353 fCurrentEntity = null;
1354 // reset scanner
1355 if(fXML10EntityScanner != null){
1356 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter);
1357 }
1358 if(fXML11EntityScanner != null) {
1359 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter);
1360 }
1361
1362 // DEBUG
1363 if (DEBUG_ENTITIES) {
1364 addInternalEntity("text", "Hello, World.");
1365 addInternalEntity("empty-element", "<foo/>");
1366 addInternalEntity("balanced-element", "<foo></foo>");
1367 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>");
1368 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>");
1369 addInternalEntity("unbalanced-entity", "<foo>");
1370 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>");
1371 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>");
1372 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>");
1373 try {
1374 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml");
1375 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml");
1376 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml");
1377 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml");
1378 }
1379 catch (IOException ex) {
1380 // should never happen
1381 }
1382 }
1383
1384 // copy declared entities
1385 if (fDeclaredEntities != null) {
1386 java.util.Enumeration keys = fDeclaredEntities.keys();
1387 while (keys.hasMoreElements()) {
1388 Object key = keys.nextElement();
1389 Object value = fDeclaredEntities.get(key);
1390 fEntities.put(key, value);
1391 }
1392 }
1393 fEntityHandler = null;
1394
1395 } // reset(XMLComponentManager)
1396
1397 /**
1398 * Returns a list of feature identifiers that are recognized by
1399 * this component. This method may return null if no features
1400 * are recognized by this component.
1401 */
1402 public String[] getRecognizedFeatures() {
1403 return (String[])(RECOGNIZED_FEATURES.clone());
1404 } // getRecognizedFeatures():String[]
1405
1406 /**
1407 * Sets the state of a feature. This method is called by the component
1408 * manager any time after reset when a feature changes state.
1409 * <p>
1410 * <strong>Note:</strong> Components should silently ignore features
1411 * that do not affect the operation of the component.
1412 *
1413 * @param featureId The feature identifier.
1414 * @param state The state of the feature.
1415 *
1416 * @throws SAXNotRecognizedException The component should not throw
1417 * this exception.
1418 * @throws SAXNotSupportedException The component should not throw
1419 * this exception.
1420 */
1421 public void setFeature(String featureId, boolean state)
1422 throws XMLConfigurationException {
1423
1424 // xerces features
1425 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
1426 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
1427 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() &&
1428 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) {
1429 fAllowJavaEncodings = state;
1430 }
1431 }
1432
1433 } // setFeature(String,boolean)
1434
1435 /**
1436 * Returns a list of property identifiers that are recognized by
1437 * this component. This method may return null if no properties
1438 * are recognized by this component.
1439 */
1440 public String[] getRecognizedProperties() {
1441 return (String[])(RECOGNIZED_PROPERTIES.clone());
1442 } // getRecognizedProperties():String[]
1443
1444 /**
1445 * Sets the value of a property. This method is called by the component
1446 * manager any time after reset when a property changes value.
1447 * <p>
1448 * <strong>Note:</strong> Components should silently ignore properties
1449 * that do not affect the operation of the component.
1450 *
1451 * @param propertyId The property identifier.
1452 * @param value The value of the property.
1453 *
1454 * @throws SAXNotRecognizedException The component should not throw
1455 * this exception.
1456 * @throws SAXNotSupportedException The component should not throw
1457 * this exception.
1458 */
1459 public void setProperty(String propertyId, Object value)
1460 throws XMLConfigurationException {
1461
1462 // Xerces properties
1463 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
1464 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
1465
1466 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() &&
1467 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
1468 fSymbolTable = (SymbolTable)value;
1469 return;
1470 }
1471 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() &&
1472 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
1473 fErrorReporter = (XMLErrorReporter)value;
1474 return;
1475 }
1476 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() &&
1477 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
1478 fEntityResolver = (XMLEntityResolver)value;
1479 return;
1480 }
1481 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() &&
1482 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) {
1483 Integer bufferSize = (Integer)value;
1484 if (bufferSize != null &&
1485 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) {
1486 fBufferSize = bufferSize.intValue();
1487 fEntityScanner.setBufferSize(fBufferSize);
1488 fByteBufferPool.setBufferSize(fBufferSize);
1489 fCharacterBufferPool.setExternalBufferSize(fBufferSize);
1490 }
1491 }
1492 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() &&
1493 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) {
1494 fSecurityManager = (SecurityManager)value;
1495 fEntityExpansionLimit = (fSecurityManager != null)?fSecurityManager.getEntityExpansionLimit():0;
1496 }
1497 }
1498
1499 } // setProperty(String,Object)
1500
1501 /**
1502 * Returns the default state for a feature, or null if this
1503 * component does not want to report a default value for this
1504 * feature.
1505 *
1506 * @param featureId The feature identifier.
1507 *
1508 * @since Xerces 2.2.0
1509 */
1510 public Boolean getFeatureDefault(String featureId) {
1511 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
1512 if (RECOGNIZED_FEATURES[i].equals(featureId)) {
1513 return FEATURE_DEFAULTS[i];
1514 }
1515 }
1516 return null;
1517 } // getFeatureDefault(String):Boolean
1518
1519 /**
1520 * Returns the default state for a property, or null if this
1521 * component does not want to report a default value for this
1522 * property.
1523 *
1524 * @param propertyId The property identifier.
1525 *
1526 * @since Xerces 2.2.0
1527 */
1528 public Object getPropertyDefault(String propertyId) {
1529 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
1530 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
1531 return PROPERTY_DEFAULTS[i];
1532 }
1533 }
1534 return null;
1535 } // getPropertyDefault(String):Object
1536
1537 //
1538 // Public static methods
1539 //
1540
1541 // current value of the "user.dir" property
1542 private static String gUserDir;
1543 // cached URI object for the current value of the escaped "user.dir" property stored as a URI
1544 private static URI gUserDirURI;
1545 // which ASCII characters need to be escaped
1546 private static final boolean gNeedEscaping[] = new boolean[128];
1547 // the first hex character if a character needs to be escaped
1548 private static final char gAfterEscaping1[] = new char[128];
1549 // the second hex character if a character needs to be escaped
1550 private static final char gAfterEscaping2[] = new char[128];
1551 private static final char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
1552 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
1553 // initialize the above 3 arrays
1554 static {
1555 for (int i = 0; i <= 0x1f; i++) {
1556 gNeedEscaping[i] = true;
1557 gAfterEscaping1[i] = gHexChs[i >> 4];
1558 gAfterEscaping2[i] = gHexChs[i & 0xf];
1559 }
1560 gNeedEscaping[0x7f] = true;
1561 gAfterEscaping1[0x7f] = '7';
1562 gAfterEscaping2[0x7f] = 'F';
1563 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}',
1564 '|', '\\', '^', '~', '[', ']', '`'};
1565 int len = escChs.length;
1566 char ch;
1567 for (int i = 0; i < len; i++) {
1568 ch = escChs[i];
1569 gNeedEscaping[ch] = true;
1570 gAfterEscaping1[ch] = gHexChs[ch >> 4];
1571 gAfterEscaping2[ch] = gHexChs[ch & 0xf];
1572 }
1573 }
1574
1575 private static PrivilegedAction GET_USER_DIR_SYSTEM_PROPERTY = new PrivilegedAction() {
1576 public Object run() {
1577 return System.getProperty("user.dir");
1578 }
1579 };
1580
1581 // To escape the "user.dir" system property, by using %HH to represent
1582 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
1583 // and '"'. It's a static method, so needs to be synchronized.
1584 // this method looks heavy, but since the system property isn't expected
1585 // to change often, so in most cases, we only need to return the URI
1586 // that was escaped before.
1587 // According to the URI spec, non-ASCII characters (whose value >= 128)
1588 // need to be escaped too.
1589 // REVISIT: don't know how to escape non-ASCII characters, especially
1590 // which encoding to use. Leave them for now.
1591 private static synchronized URI getUserDir() throws URI.MalformedURIException {
1592 // get the user.dir property
1593 String userDir = "";
1594 try {
1595 userDir = (String) AccessController.doPrivileged(GET_USER_DIR_SYSTEM_PROPERTY);
1596 }
1597 catch (SecurityException se) {}
1598
1599 // return empty string if property value is empty string.
1600 if (userDir.length() == 0)
1601 return new URI("file", "", "", null, null);
1602
1603 // compute the new escaped value if the new property value doesn't
1604 // match the previous one
1605 if (gUserDirURI != null && userDir.equals(gUserDir)) {
1606 return gUserDirURI;
1607 }
1608
1609 // record the new value as the global property value
1610 gUserDir = userDir;
1611
1612 char separator = java.io.File.separatorChar;
1613 userDir = userDir.replace(separator, '/');
1614
1615 int len = userDir.length(), ch;
1616 StringBuffer buffer = new StringBuffer(len*3);
1617 // change C:/blah to /C:/blah
1618 if (len >= 2 && userDir.charAt(1) == ':') {
1619 ch = Character.toUpperCase(userDir.charAt(0));
1620 if (ch >= 'A' && ch <= 'Z') {
1621 buffer.append('/');
1622 }
1623 }
1624
1625 // for each character in the path
1626 int i = 0;
1627 for (; i < len; i++) {
1628 ch = userDir.charAt(i);
1629 // if it's not an ASCII character, break here, and use UTF-8 encoding
1630 if (ch >= 128)
1631 break;
1632 if (gNeedEscaping[ch]) {
1633 buffer.append('%');
1634 buffer.append(gAfterEscaping1[ch]);
1635 buffer.append(gAfterEscaping2[ch]);
1636 // record the fact that it's escaped
1637 }
1638 else {
1639 buffer.append((char)ch);
1640 }
1641 }
1642
1643 // we saw some non-ascii character
1644 if (i < len) {
1645 // get UTF-8 bytes for the remaining sub-string
1646 byte[] bytes = null;
1647 byte b;
1648 try {
1649 bytes = userDir.substring(i).getBytes("UTF-8");
1650 } catch (java.io.UnsupportedEncodingException e) {
1651 // should never happen
1652 return new URI("file", "", userDir, null, null);
1653 }
1654 len = bytes.length;
1655
1656 // for each byte
1657 for (i = 0; i < len; i++) {
1658 b = bytes[i];
1659 // for non-ascii character: make it positive, then escape
1660 if (b < 0) {
1661 ch = b + 256;
1662 buffer.append('%');
1663 buffer.append(gHexChs[ch >> 4]);
1664 buffer.append(gHexChs[ch & 0xf]);
1665 }
1666 else if (gNeedEscaping[b]) {
1667 buffer.append('%');
1668 buffer.append(gAfterEscaping1[b]);
1669 buffer.append(gAfterEscaping2[b]);
1670 }
1671 else {
1672 buffer.append((char)b);
1673 }
1674 }
1675 }
1676
1677 // change blah/blah to blah/blah/
1678 if (!userDir.endsWith("/"))
1679 buffer.append('/');
1680
1681 gUserDirURI = new URI("file", "", buffer.toString(), null, null);
1682
1683 return gUserDirURI;
1684 }
1685
1686 /**
1687 * Absolutizes a URI using the current value
1688 * of the "user.dir" property as the base URI. If
1689 * the URI is already absolute, this is a no-op.
1690 *
1691 * @param uri the URI to absolutize
1692 */
1693 public static void absolutizeAgainstUserDir(URI uri)
1694 throws URI.MalformedURIException {
1695 uri.absolutize(getUserDir());
1696 }
1697
1698 /**
1699 * Expands a system id and returns the system id as a URI, if
1700 * it can be expanded. A return value of null means that the
1701 * identifier is already expanded. An exception thrown
1702 * indicates a failure to expand the id.
1703 *
1704 * @param systemId The systemId to be expanded.
1705 *
1706 * @return Returns the URI string representing the expanded system
1707 * identifier. A null value indicates that the given
1708 * system identifier is already expanded.
1709 *
1710 */
1711 public static String expandSystemId(String systemId, String baseSystemId,
1712 boolean strict)
1713 throws URI.MalformedURIException {
1714
1715 // check if there is a system id before
1716 // trying to expand it.
1717 if (systemId == null) {
1718 return null;
1719 }
1720
1721 // system id has to be a valid URI
1722 if (strict) {
1723 return expandSystemIdStrictOn(systemId, baseSystemId);
1724 }
1725
1726 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up.
1727 try {
1728 return expandSystemIdStrictOff(systemId, baseSystemId);
1729 }
1730 catch (URI.MalformedURIException e) {
1731 // continue on...
1732 }
1733
1734 // check for bad parameters id
1735 if (systemId.length() == 0) {
1736 return systemId;
1737 }
1738
1739 // normalize id
1740 String id = fixURI(systemId);
1741
1742 // normalize base
1743 URI base = null;
1744 URI uri = null;
1745 try {
1746 if (baseSystemId == null || baseSystemId.length() == 0 ||
1747 baseSystemId.equals(systemId)) {
1748 base = getUserDir();
1749 }
1750 else {
1751 try {
1752 base = new URI(fixURI(baseSystemId).trim());
1753 }
1754 catch (URI.MalformedURIException e) {
1755 if (baseSystemId.indexOf(':') != -1) {
1756 // for xml schemas we might have baseURI with
1757 // a specified drive
1758 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null);
1759 }
1760 else {
1761 base = new URI(getUserDir(), fixURI(baseSystemId));
1762 }
1763 }
1764 }
1765 // expand id
1766 uri = new URI(base, id.trim());
1767 }
1768 catch (Exception e) {
1769 // let it go through
1770
1771 }
1772
1773 if (uri == null) {
1774 return systemId;
1775 }
1776 return uri.toString();
1777
1778 } // expandSystemId(String,String,boolean):String
1779
1780 /**
1781 * Helper method for expandSystemId(String,String,boolean):String
1782 */
1783 private static String expandSystemIdStrictOn(String systemId, String baseSystemId)
1784 throws URI.MalformedURIException {
1785
1786 URI systemURI = new URI(systemId, true);
1787 // If it's already an absolute one, return it
1788 if (systemURI.isAbsoluteURI()) {
1789 return systemId;
1790 }
1791
1792 // If there isn't a base URI, use the working directory
1793 URI baseURI = null;
1794 if (baseSystemId == null || baseSystemId.length() == 0) {
1795 baseURI = getUserDir();
1796 }
1797 else {
1798 baseURI = new URI(baseSystemId, true);
1799 if (!baseURI.isAbsoluteURI()) {
1800 // assume "base" is also a relative uri
1801 baseURI.absolutize(getUserDir());
1802 }
1803 }
1804
1805 // absolutize the system identifier using the base URI
1806 systemURI.absolutize(baseURI);
1807
1808 // return the string rep of the new uri (an absolute one)
1809 return systemURI.toString();
1810
1811 // if any exception is thrown, it'll get thrown to the caller.
1812
1813 } // expandSystemIdStrictOn(String,String):String
1814
1815 /**
1816 * Helper method for expandSystemId(String,String,boolean):String
1817 */
1818 private static String expandSystemIdStrictOff(String systemId, String baseSystemId)
1819 throws URI.MalformedURIException {
1820
1821 URI systemURI = new URI(systemId, true);
1822 // If it's already an absolute one, return it
1823 if (systemURI.isAbsoluteURI()) {
1824 if (systemURI.getScheme().length() > 1) {
1825 return systemId;
1826 }
1827 /**
1828 * If the scheme's length is only one character,
1829 * it's likely that this was intended as a file
1830 * path. Fixing this up in expandSystemId to
1831 * maintain backwards compatibility.
1832 */
1833 throw new URI.MalformedURIException();
1834 }
1835
1836 // If there isn't a base URI, use the working directory
1837 URI baseURI = null;
1838 if (baseSystemId == null || baseSystemId.length() == 0) {
1839 baseURI = getUserDir();
1840 }
1841 else {
1842 baseURI = new URI(baseSystemId, true);
1843 if (!baseURI.isAbsoluteURI()) {
1844 // assume "base" is also a relative uri
1845 baseURI.absolutize(getUserDir());
1846 }
1847 }
1848
1849 // absolutize the system identifier using the base URI
1850 systemURI.absolutize(baseURI);
1851
1852 // return the string rep of the new uri (an absolute one)
1853 return systemURI.toString();
1854
1855 // if any exception is thrown, it'll get thrown to the caller.
1856
1857 } // expandSystemIdStrictOff(String,String):String
1858
1859 /**
1860 * Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>.
1861 * This may fail on earlier JDKs which do not support setting this preference.
1862 */
1863 public static void setInstanceFollowRedirects(HttpURLConnection urlCon, boolean followRedirects) {
1864 try {
1865 Method method = HttpURLConnection.class.getMethod("setInstanceFollowRedirects", new Class[] {Boolean.TYPE});
1866 method.invoke(urlCon, new Object[] {followRedirects ? Boolean.TRUE : Boolean.FALSE});
1867 }
1868 // setInstanceFollowRedirects doesn't exist.
1869 catch (Exception exc) {}
1870 }
1871
1872 public static OutputStream createOutputStream(String uri) throws IOException {
1873 // URI was specified. Handle relative URIs.
1874 String expanded = XMLEntityManager.expandSystemId(uri, null, true);
1875 URL url = new URL(expanded != null ? expanded : uri);
1876 OutputStream out = null;
1877 String protocol = url.getProtocol();
1878 String host = url.getHost();
1879 // Use FileOutputStream if this URI is for a local file.
1880 if (protocol.equals("file")
1881 && (host == null || host.length() == 0 || host.equals("localhost"))) {
1882 out = new FileOutputStream(getPathWithoutEscapes(url.getPath()));
1883 }
1884 // Try to write to some other kind of URI. Some protocols
1885 // won't support this, though HTTP should work.
1886 else {
1887 URLConnection urlCon = url.openConnection();
1888 urlCon.setDoInput(false);
1889 urlCon.setDoOutput(true);
1890 urlCon.setUseCaches(false); // Enable tunneling.
1891 if (urlCon instanceof HttpURLConnection) {
1892 // The DOM L3 REC says if we are writing to an HTTP URI
1893 // it is to be done with an HTTP PUT.
1894 HttpURLConnection httpCon = (HttpURLConnection) urlCon;
1895 httpCon.setRequestMethod("PUT");
1896 }
1897 out = urlCon.getOutputStream();
1898 }
1899 return out;
1900 }
1901
1902 private static String getPathWithoutEscapes(String origPath) {
1903 if (origPath != null && origPath.length() != 0 && origPath.indexOf('%') != -1) {
1904 // Locate the escape characters
1905 StringTokenizer tokenizer = new StringTokenizer(origPath, "%");
1906 StringBuffer result = new StringBuffer(origPath.length());
1907 int size = tokenizer.countTokens();
1908 result.append(tokenizer.nextToken());
1909 for(int i = 1; i < size; ++i) {
1910 String token = tokenizer.nextToken();
1911 // Decode the 2 digit hexadecimal number following % in '%nn'
1912 result.append((char)Integer.valueOf(token.substring(0, 2), 16).intValue());
1913 result.append(token.substring(2));
1914 }
1915 return result.toString();
1916 }
1917 return origPath;
1918 }
1919
1920 //
1921 // Protected methods
1922 //
1923
1924 /**
1925 * Ends an entity.
1926 *
1927 * @throws XNIException Thrown by entity handler to signal an error.
1928 */
1929 void endEntity() throws XNIException {
1930
1931 // call handler
1932 if (DEBUG_BUFFER) {
1933 System.out.print("(endEntity: ");
1934 print(fCurrentEntity);
1935 System.out.println();
1936 }
1937 if (fEntityHandler != null) {
1938 fEntityHandler.endEntity(fCurrentEntity.name, null);
1939 }
1940
1941 // Close the reader for the current entity once we're
1942 // done with it, and remove it from our stack. If parsing
1943 // is halted at some point, the rest of the readers on
1944 // the stack will be closed during cleanup.
1945 try {
1946 fCurrentEntity.reader.close();
1947 }
1948 catch (IOException e) {
1949 // ignore
1950 }
1951 // REVISIT: We should never encounter underflow if the calls
1952 // to startEntity and endEntity are balanced, but guard
1953 // against the EmptyStackException for now. -- mrglavas
1954 if(!fReaderStack.isEmpty()) {
1955 fReaderStack.pop();
1956 }
1957
1958 // Release the character buffer back to the pool for reuse
1959 fCharacterBufferPool.returnBuffer(fCurrentEntity.fCharacterBuffer);
1960
1961 // Release the byte buffer back to the pool for reuse
1962 if (fCurrentEntity.fByteBuffer != null) {
1963 fByteBufferPool.returnBuffer(fCurrentEntity.fByteBuffer);
1964 }
1965
1966 // Pop entity stack.
1967 fCurrentEntity = fEntityStack.size() > 0
1968 ? (ScannedEntity)fEntityStack.pop() : null;
1969 fEntityScanner.setCurrentEntity(fCurrentEntity);
1970 if (DEBUG_BUFFER) {
1971 System.out.print(")endEntity: ");
1972 print(fCurrentEntity);
1973 System.out.println();
1974 }
1975
1976 } // endEntity()
1977
1978 /**
1979 * Returns the IANA encoding name that is auto-detected from
1980 * the bytes specified, with the endian-ness of that encoding where appropriate.
1981 *
1982 * @param b4 The first four bytes of the input.
1983 * @param count The number of bytes actually read.
1984 * @return a 2-element array: the first element, an IANA-encoding string,
1985 * the second element a Boolean which is true iff the document is big endian, false
1986 * if it's little-endian, and null if the distinction isn't relevant.
1987 */
1988 protected Object[] getEncodingName(byte[] b4, int count) {
1989
1990 if (count < 2) {
1991 return new Object[]{"UTF-8", null};
1992 }
1993
1994 // UTF-16, with BOM
1995 int b0 = b4[0] & 0xFF;
1996 int b1 = b4[1] & 0xFF;
1997 if (b0 == 0xFE && b1 == 0xFF) {
1998 // UTF-16, big-endian
1999 return new Object [] {"UTF-16BE", Boolean.TRUE};
2000 }
2001 if (b0 == 0xFF && b1 == 0xFE) {
2002 // UTF-16, little-endian
2003 return new Object [] {"UTF-16LE", Boolean.FALSE};
2004 }
2005
2006 // default to UTF-8 if we don't have enough bytes to make a
2007 // good determination of the encoding
2008 if (count < 3) {
2009 return new Object [] {"UTF-8", null};
2010 }
2011
2012 // UTF-8 with a BOM
2013 int b2 = b4[2] & 0xFF;
2014 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
2015 return new Object [] {"UTF-8", null};
2016 }
2017
2018 // default to UTF-8 if we don't have enough bytes to make a
2019 // good determination of the encoding
2020 if (count < 4) {
2021 return new Object [] {"UTF-8", null};
2022 }
2023
2024 // other encodings
2025 int b3 = b4[3] & 0xFF;
2026 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
2027 // UCS-4, big endian (1234)
2028 return new Object [] {"ISO-10646-UCS-4", Boolean.TRUE};
2029 }
2030 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
2031 // UCS-4, little endian (4321)
2032 return new Object [] {"ISO-10646-UCS-4", Boolean.FALSE};
2033 }
2034 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
2035 // UCS-4, unusual octet order (2143)
2036 // REVISIT: What should this be?
2037 return new Object [] {"ISO-10646-UCS-4", null};
2038 }
2039 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
2040 // UCS-4, unusual octect order (3412)
2041 // REVISIT: What should this be?
2042 return new Object [] {"ISO-10646-UCS-4", null};
2043 }
2044 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2045 // UTF-16, big-endian, no BOM
2046 // (or could turn out to be UCS-2...
2047 // REVISIT: What should this be?
2048 return new Object [] {"UTF-16BE", Boolean.TRUE};
2049 }
2050 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2051 // UTF-16, little-endian, no BOM
2052 // (or could turn out to be UCS-2...
2053 return new Object [] {"UTF-16LE", Boolean.FALSE};
2054 }
2055 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2056 // EBCDIC
2057 // a la xerces1, return CP037 instead of EBCDIC here
2058 return new Object [] {"CP037", null};
2059 }
2060
2061 // default encoding
2062 return new Object [] {"UTF-8", null};
2063
2064 } // getEncodingName(byte[],int):Object[]
2065
2066 /**
2067 * Creates a reader capable of reading the given input stream in
2068 * the specified encoding.
2069 *
2070 * @param inputStream The input stream.
2071 * @param encoding The encoding name that the input stream is
2072 * encoded using. If the user has specified that
2073 * Java encoding names are allowed, then the
2074 * encoding name may be a Java encoding name;
2075 * otherwise, it is an ianaEncoding name.
2076 * @param isBigEndian For encodings (like uCS-4), whose names cannot
2077 * specify a byte order, this tells whether the order is bigEndian. null menas
2078 * unknown or not relevant.
2079 *
2080 * @return Returns a reader.
2081 */
2082 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
2083 throws IOException {
2084
2085 // if the encoding is UTF-8 use the optimized UTF-8 reader
2086 if (encoding == "UTF-8" || encoding == null) {
2087 if (DEBUG_ENCODINGS) {
2088 System.out.println("$$$ creating UTF8Reader");
2089 }
2090 if (fTempByteBuffer == null) {
2091 fTempByteBuffer = fByteBufferPool.getBuffer();
2092 }
2093 return new UTF8Reader(inputStream, fTempByteBuffer, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
2094 }
2095
2096 // try to use an optimized reader
2097 String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
2098 if (ENCODING.equals("UTF-8")) {
2099 if (DEBUG_ENCODINGS) {
2100 System.out.println("$$$ creating UTF8Reader");
2101 }
2102 if (fTempByteBuffer == null) {
2103 fTempByteBuffer = fByteBufferPool.getBuffer();
2104 }
2105 return new UTF8Reader(inputStream, fTempByteBuffer, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
2106 }
2107 if(ENCODING.equals("ISO-10646-UCS-4")) {
2108 if(isBigEndian != null) {
2109 boolean isBE = isBigEndian.booleanValue();
2110 if(isBE) {
2111 return new UCSReader(inputStream, UCSReader.UCS4BE);
2112 } else {
2113 return new UCSReader(inputStream, UCSReader.UCS4LE);
2114 }
2115 } else {
2116 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2117 "EncodingByteOrderUnsupported",
2118 new Object[] { encoding },
2119 XMLErrorReporter.SEVERITY_FATAL_ERROR);
2120 }
2121 }
2122 if(ENCODING.equals("ISO-10646-UCS-2")) {
2123 if(isBigEndian != null) { // sould never happen with this encoding...
2124 boolean isBE = isBigEndian.booleanValue();
2125 if(isBE) {
2126 return new UCSReader(inputStream, UCSReader.UCS2BE);
2127 } else {
2128 return new UCSReader(inputStream, UCSReader.UCS2LE);
2129 }
2130 } else {
2131 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2132 "EncodingByteOrderUnsupported",
2133 new Object[] { encoding },
2134 XMLErrorReporter.SEVERITY_FATAL_ERROR);
2135 }
2136 }
2137
2138 // check for valid name
2139 boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
2140 boolean validJava = XMLChar.isValidJavaEncoding(encoding);
2141 if (!validIANA || (fAllowJavaEncodings && !validJava)) {
2142 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2143 "EncodingDeclInvalid",
2144 new Object[] { encoding },
2145 XMLErrorReporter.SEVERITY_FATAL_ERROR);
2146 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
2147 // because every byte is a valid ISO Latin 1 character.
2148 // It may not translate correctly but if we failed on
2149 // the encoding anyway, then we're expecting the content
2150 // of the document to be bad. This will just prevent an
2151 // invalid UTF-8 sequence to be detected. This is only
2152 // important when continue-after-fatal-error is turned
2153 // on. -Ac
2154 if (DEBUG_ENCODINGS) {
2155 System.out.println("$$$ creating Latin1Reader");
2156 }
2157 return new Latin1Reader(inputStream, fBufferSize);
2158 }
2159
2160 // try to use a Java reader
2161 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
2162 if (javaEncoding == null) {
2163 if (fAllowJavaEncodings) {
2164 javaEncoding = encoding;
2165 }
2166 else {
2167 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2168 "EncodingDeclInvalid",
2169 new Object[] { encoding },
2170 XMLErrorReporter.SEVERITY_FATAL_ERROR);
2171 // see comment above.
2172 if (DEBUG_ENCODINGS) {
2173 System.out.println("$$$ creating Latin1Reader");
2174 }
2175 if (fTempByteBuffer == null) {
2176 fTempByteBuffer = fByteBufferPool.getBuffer();
2177 }
2178 return new Latin1Reader(inputStream, fTempByteBuffer);
2179 }
2180 }
2181 else if (javaEncoding.equals("ASCII")) {
2182 if (DEBUG_ENCODINGS) {
2183 System.out.println("$$$ creating ASCIIReader");
2184 }
2185 if (fTempByteBuffer == null) {
2186 fTempByteBuffer = fByteBufferPool.getBuffer();
2187 }
2188 return new ASCIIReader(inputStream, fTempByteBuffer, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
2189 }
2190 else if (javaEncoding.equals("ISO8859_1")) {
2191 if (DEBUG_ENCODINGS) {
2192 System.out.println("$$$ creating Latin1Reader");
2193 }
2194 if (fTempByteBuffer == null) {
2195 fTempByteBuffer = fByteBufferPool.getBuffer();
2196 }
2197 return new Latin1Reader(inputStream, fTempByteBuffer);
2198 }
2199 if (DEBUG_ENCODINGS) {
2200 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
2201 if (javaEncoding == encoding) {
2202 System.out.print(" (IANA encoding)");
2203 }
2204 System.out.println();
2205 }
2206 return new InputStreamReader(inputStream, javaEncoding);
2207
2208 } // createReader(InputStream,String, Boolean): Reader
2209
2210 //
2211 // Protected static methods
2212 //
2213
2214 /**
2215 * Fixes a platform dependent filename to standard URI form.
2216 *
2217 * @param str The string to fix.
2218 *
2219 * @return Returns the fixed URI string.
2220 */
2221 protected static String fixURI(String str) {
2222
2223 // handle platform dependent strings
2224 str = str.replace(java.io.File.separatorChar, '/');
2225
2226 StringBuffer sb = null;
2227
2228 // Windows fix
2229 if (str.length() >= 2) {
2230 char ch1 = str.charAt(1);
2231 // change "C:blah" to "file:///C:blah"
2232 if (ch1 == ':') {
2233 char ch0 = Character.toUpperCase(str.charAt(0));
2234 if (ch0 >= 'A' && ch0 <= 'Z') {
2235 sb = new StringBuffer(str.length() + 8);
2236 sb.append("file:///");
2237 }
2238 }
2239 // change "//blah" to "file://blah"
2240 else if (ch1 == '/' && str.charAt(0) == '/') {
2241 sb = new StringBuffer(str.length() + 5);
2242 sb.append("file:");
2243 }
2244 }
2245
2246 int pos = str.indexOf(' ');
2247 // there is no space in the string
2248 // we just append "str" to the end of sb
2249 if (pos < 0) {
2250 if (sb != null) {
2251 sb.append(str);
2252 str = sb.toString();
2253 }
2254 }
2255 // otherwise, convert all ' ' to "%20".
2256 // Note: the following algorithm might not be very performant,
2257 // but people who want to use invalid URI's have to pay the price.
2258 else {
2259 if (sb == null)
2260 sb = new StringBuffer(str.length());
2261 // put characters before ' ' into the string buffer
2262 for (int i = 0; i < pos; i++)
2263 sb.append(str.charAt(i));
2264 // and %20 for the space
2265 sb.append("%20");
2266 // for the remamining part, also convert ' ' to "%20".
2267 for (int i = pos+1; i < str.length(); i++) {
2268 if (str.charAt(i) == ' ')
2269 sb.append("%20");
2270 else
2271 sb.append(str.charAt(i));
2272 }
2273 str = sb.toString();
2274 }
2275
2276 // done
2277 return str;
2278
2279 } // fixURI(String):String
2280
2281 //
2282 // Package visible methods
2283 //
2284
2285 /**
2286 * Returns the hashtable of declared entities.
2287 * <p>
2288 * <strong>REVISIT:</strong>
2289 * This should be done the "right" way by designing a better way to
2290 * enumerate the declared entities. For now, this method is needed
2291 * by the constructor that takes an XMLEntityManager parameter.
2292 */
2293 Hashtable getDeclaredEntities() {
2294 return fEntities;
2295 } // getDeclaredEntities():Hashtable
2296
2297 /** Prints the contents of the buffer. */
2298 static final void print(ScannedEntity currentEntity) {
2299 if (DEBUG_BUFFER) {
2300 if (currentEntity != null) {
2301 System.out.print('[');
2302 System.out.print(currentEntity.count);
2303 System.out.print(' ');
2304 System.out.print(currentEntity.position);
2305 if (currentEntity.count > 0) {
2306 System.out.print(" \"");
2307 for (int i = 0; i < currentEntity.count; i++) {
2308 if (i == currentEntity.position) {
2309 System.out.print('^');
2310 }
2311 char c = currentEntity.ch[i];
2312 switch (c) {
2313 case '\n': {
2314 System.out.print("\\n");
2315 break;
2316 }
2317 case '\r': {
2318 System.out.print("\\r");
2319 break;
2320 }
2321 case '\t': {
2322 System.out.print("\\t");
2323 break;
2324 }
2325 case '\\': {
2326 System.out.print("\\\\");
2327 break;
2328 }
2329 default: {
2330 System.out.print(c);
2331 }
2332 }
2333 }
2334 if (currentEntity.position == currentEntity.count) {
2335 System.out.print('^');
2336 }
2337 System.out.print('"');
2338 }
2339 System.out.print(']');
2340 System.out.print(" @ ");
2341 System.out.print(currentEntity.lineNumber);
2342 System.out.print(',');
2343 System.out.print(currentEntity.columnNumber);
2344 }
2345 else {
2346 System.out.print("*NO CURRENT ENTITY*");
2347 }
2348 }
2349 } // print(ScannedEntity)
2350
2351 //
2352 // Classes
2353 //
2354
2355 /**
2356 * Entity information.
2357 *
2358 * @xerces.internal
2359 *
2360 * @author Andy Clark, IBM
2361 */
2362 public static abstract class Entity {
2363
2364 //
2365 // Data
2366 //
2367
2368 /** Entity name. */
2369 public String name;
2370
2371 // whether this entity's declaration was found in the internal
2372 // or external subset
2373 public boolean inExternalSubset;
2374
2375 //
2376 // Constructors
2377 //
2378
2379 /** Default constructor. */
2380 public Entity() {
2381 clear();
2382 } // <init>()
2383
2384 /** Constructs an entity. */
2385 public Entity(String name, boolean inExternalSubset) {
2386 this.name = name;
2387 this.inExternalSubset = inExternalSubset;
2388 } // <init>(String)
2389
2390 //
2391 // Public methods
2392 //
2393
2394 /** Returns true if this entity was declared in the external subset. */
2395 public boolean isEntityDeclInExternalSubset () {
2396 return inExternalSubset;
2397 }
2398
2399 /** Returns true if this is an external entity. */
2400 public abstract boolean isExternal();
2401
2402 /** Returns true if this is an unparsed entity. */
2403 public abstract boolean isUnparsed();
2404
2405 /** Clears the entity. */
2406 public void clear() {
2407 name = null;
2408 inExternalSubset = false;
2409 } // clear()
2410
2411 /** Sets the values of the entity. */
2412 public void setValues(Entity entity) {
2413 name = entity.name;
2414 inExternalSubset = entity.inExternalSubset;
2415 } // setValues(Entity)
2416
2417 } // class Entity
2418
2419 /**
2420 * Internal entity.
2421 *
2422 * @xerces.internal
2423 *
2424 * @author Andy Clark, IBM
2425 */
2426 protected static class InternalEntity
2427 extends Entity {
2428
2429 //
2430 // Data
2431 //
2432
2433 /** Text value of entity. */
2434 public String text;
2435
2436 //
2437 // Constructors
2438 //
2439
2440 /** Default constructor. */
2441 public InternalEntity() {
2442 clear();
2443 } // <init>()
2444
2445 /** Constructs an internal entity. */
2446 public InternalEntity(String name, String text, boolean inExternalSubset) {
2447 super(name,inExternalSubset);
2448 this.text = text;
2449 } // <init>(String,String)
2450
2451 //
2452 // Entity methods
2453 //
2454
2455 /** Returns true if this is an external entity. */
2456 public final boolean isExternal() {
2457 return false;
2458 } // isExternal():boolean
2459
2460 /** Returns true if this is an unparsed entity. */
2461 public final boolean isUnparsed() {
2462 return false;
2463 } // isUnparsed():boolean
2464
2465 /** Clears the entity. */
2466 public void clear() {
2467 super.clear();
2468 text = null;
2469 } // clear()
2470
2471 /** Sets the values of the entity. */
2472 public void setValues(Entity entity) {
2473 super.setValues(entity);
2474 text = null;
2475 } // setValues(Entity)
2476
2477 /** Sets the values of the entity. */
2478 public void setValues(InternalEntity entity) {
2479 super.setValues(entity);
2480 text = entity.text;
2481 } // setValues(InternalEntity)
2482
2483 } // class InternalEntity
2484
2485 /**
2486 * External entity.
2487 *
2488 * @xerces.internal
2489 *
2490 * @author Andy Clark, IBM
2491 */
2492 protected static class ExternalEntity
2493 extends Entity {
2494
2495 //
2496 // Data
2497 //
2498
2499 /** container for all relevant entity location information. */
2500 public XMLResourceIdentifier entityLocation;
2501
2502 /** Notation name for unparsed entity. */
2503 public String notation;
2504
2505 //
2506 // Constructors
2507 //
2508
2509 /** Default constructor. */
2510 public ExternalEntity() {
2511 clear();
2512 } // <init>()
2513
2514 /** Constructs an internal entity. */
2515 public ExternalEntity(String name, XMLResourceIdentifier entityLocation,
2516 String notation, boolean inExternalSubset) {
2517 super(name,inExternalSubset);
2518 this.entityLocation = entityLocation;
2519 this.notation = notation;
2520 } // <init>(String,XMLResourceIdentifier, String)
2521
2522 //
2523 // Entity methods
2524 //
2525
2526 /** Returns true if this is an external entity. */
2527 public final boolean isExternal() {
2528 return true;
2529 } // isExternal():boolean
2530
2531 /** Returns true if this is an unparsed entity. */
2532 public final boolean isUnparsed() {
2533 return notation != null;
2534 } // isUnparsed():boolean
2535
2536 /** Clears the entity. */
2537 public void clear() {
2538 super.clear();
2539 entityLocation = null;
2540 notation = null;
2541 } // clear()
2542
2543 /** Sets the values of the entity. */
2544 public void setValues(Entity entity) {
2545 super.setValues(entity);
2546 entityLocation = null;
2547 notation = null;
2548 } // setValues(Entity)
2549
2550 /** Sets the values of the entity. */
2551 public void setValues(ExternalEntity entity) {
2552 super.setValues(entity);
2553 entityLocation = entity.entityLocation;
2554 notation = entity.notation;
2555 } // setValues(ExternalEntity)
2556
2557 } // class ExternalEntity
2558
2559 /**
2560 * Entity state.
2561 *
2562 * @xerces.internal
2563 *
2564 * @author Andy Clark, IBM
2565 */
2566 public class ScannedEntity
2567 extends Entity {
2568
2569 //
2570 // Data
2571 //
2572
2573 // i/o
2574
2575 /** Input stream. */
2576 public InputStream stream;
2577
2578 /** Reader. */
2579 public Reader reader;
2580
2581 // locator information
2582
2583 /** entity location information */
2584 public XMLResourceIdentifier entityLocation;
2585
2586 /** Line number. */
2587 public int lineNumber = 1;
2588
2589 /** Column number. */
2590 public int columnNumber = 1;
2591
2592 // encoding
2593
2594 /** Auto-detected encoding. */
2595 public String encoding;
2596
2597 /**
2598 * Encoding has been set externally, for example
2599 * using a SAX InputSource or a DOM LSInput.
2600 */
2601 boolean externallySpecifiedEncoding = false;
2602
2603 // version
2604
2605 /** XML version. **/
2606 public String xmlVersion = "1.0";
2607
2608 // status
2609
2610 /** True if in a literal. */
2611 public boolean literal;
2612
2613 // whether this is an external or internal scanned entity
2614 public boolean isExternal;
2615
2616 // buffer
2617
2618 /** Character buffer. */
2619 public char[] ch = null;
2620
2621 /** Position in character buffer. */
2622 public int position;
2623
2624 /** Base character offset for computing absolute character offset. */
2625 public int baseCharOffset;
2626
2627 /** Start position in character buffer. */
2628 public int startPosition;
2629
2630 /** Count of characters in buffer. */
2631 public int count;
2632
2633 // to allow the reader/inputStream to behave efficiently:
2634 public boolean mayReadChunks;
2635
2636 /** Character buffer container. */
2637 private CharacterBuffer fCharacterBuffer;
2638
2639 /** Byte buffer. */
2640 private byte [] fByteBuffer;
2641
2642 //
2643 // Constructors
2644 //
2645
2646 /** Constructs a scanned entity. */
2647 public ScannedEntity(String name,
2648 XMLResourceIdentifier entityLocation,
2649 InputStream stream, Reader reader, byte [] byteBuffer,
2650 String encoding, boolean literal, boolean mayReadChunks, boolean isExternal) {
2651 super(name,XMLEntityManager.this.fInExternalSubset);
2652 this.entityLocation = entityLocation;
2653 this.stream = stream;
2654 this.reader = reader;
2655 this.encoding = encoding;
2656 this.literal = literal;
2657 this.mayReadChunks = mayReadChunks;
2658 this.isExternal = isExternal;
2659 this.fCharacterBuffer = fCharacterBufferPool.getBuffer(isExternal);
2660 this.ch = fCharacterBuffer.ch;
2661 this.fByteBuffer = byteBuffer;
2662 } // <init>(StringXMLResourceIdentifier,InputStream,Reader,String,boolean, boolean)
2663
2664 //
2665 // Entity methods
2666 //
2667
2668 /** Returns true if this is an external entity. */
2669 public final boolean isExternal() {
2670 return isExternal;
2671 } // isExternal():boolean
2672
2673 /** Returns true if this is an unparsed entity. */
2674 public final boolean isUnparsed() {
2675 return false;
2676 } // isUnparsed():boolean
2677
2678 public void setReader(InputStream stream, String encoding, Boolean isBigEndian) throws IOException {
2679 fTempByteBuffer = fByteBuffer;
2680 reader = createReader(stream, encoding, isBigEndian);
2681 fByteBuffer = fTempByteBuffer;
2682 }
2683
2684 // return the expanded system ID of the
2685 // first external entity on the stack, null
2686 // otherwise.
2687 public String getExpandedSystemId() {
2688
2689 // search for the first external entity on the stack
2690 int size = fEntityStack.size();
2691 for (int i = size - 1; i >= 0; --i) {
2692 ScannedEntity externalEntity =
2693 (ScannedEntity)fEntityStack.elementAt(i);
2694
2695 if (externalEntity.entityLocation != null &&
2696 externalEntity.entityLocation.getExpandedSystemId() != null) {
2697 return externalEntity.entityLocation.getExpandedSystemId();
2698 }
2699 }
2700 return null;
2701 }
2702
2703 // return literal systemId of
2704 // nearest external entity
2705 public String getLiteralSystemId() {
2706 // search for the first external entity on the stack
2707 int size = fEntityStack.size();
2708 for (int i = size - 1; i >= 0; --i) {
2709 ScannedEntity externalEntity =
2710 (ScannedEntity)fEntityStack.elementAt(i);
2711
2712 if (externalEntity.entityLocation != null &&
2713 externalEntity.entityLocation.getLiteralSystemId() != null) {
2714 return externalEntity.entityLocation.getLiteralSystemId();
2715 }
2716 }
2717 return null;
2718 }
2719
2720 // return line number of position in most
2721 // recent external entity
2722 public int getLineNumber() {
2723 // search for the first external entity on the stack
2724 int size = fEntityStack.size();
2725 for (int i = size - 1; i >= 0 ; --i) {
2726 ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2727 if (firstExternalEntity.isExternal()) {
2728 return firstExternalEntity.lineNumber;
2729 }
2730 }
2731 return -1;
2732 }
2733
2734 // return column number of position in most
2735 // recent external entity
2736 public int getColumnNumber() {
2737 // search for the first external entity on the stack
2738 int size = fEntityStack.size();
2739 for (int i = size - 1; i >= 0; --i) {
2740 ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2741 if (firstExternalEntity.isExternal()) {
2742 return firstExternalEntity.columnNumber;
2743 }
2744 }
2745 return -1;
2746 }
2747
2748 // return character offset of position in most
2749 // recent external entity
2750 public int getCharacterOffset() {
2751 // search for the first external entity on the stack
2752 int size = fEntityStack.size();
2753 for (int i = size - 1; i >= 0; --i) {
2754 ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2755 if (firstExternalEntity.isExternal()) {
2756 return firstExternalEntity.baseCharOffset + (firstExternalEntity.position - firstExternalEntity.startPosition);
2757 }
2758 }
2759 return -1;
2760 }
2761
2762 // return encoding of most recent external entity
2763 public String getEncoding() {
2764 // search for the first external entity on the stack
2765 int size = fEntityStack.size();
2766 for (int i = size - 1; i >= 0; --i) {
2767 ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2768 if (firstExternalEntity.isExternal()) {
2769 return firstExternalEntity.encoding;
2770 }
2771 }
2772 return null;
2773 }
2774
2775 // return xml version of most recent external entity
2776 public String getXMLVersion() {
2777 // search for the first external entity on the stack
2778 int size = fEntityStack.size();
2779 for (int i = size - 1; i >= 0; --i) {
2780 ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2781 if (firstExternalEntity.isExternal()) {
2782 return firstExternalEntity.xmlVersion;
2783 }
2784 }
2785 return null;
2786 }
2787
2788 /** Returns whether the encoding of this entity was externally specified. **/
2789 public boolean isEncodingExternallySpecified() {
2790 return externallySpecifiedEncoding;
2791 }
2792
2793 /** Sets whether the encoding of this entity was externally specified. **/
2794 public void setEncodingExternallySpecified(boolean value) {
2795 externallySpecifiedEncoding = value;
2796 }
2797
2798 //
2799 // Object methods
2800 //
2801
2802 /** Returns a string representation of this object. */
2803 public String toString() {
2804
2805 StringBuffer str = new StringBuffer();
2806 str.append("name=\"").append(name).append('"');
2807 str.append(",ch=");
2808 str.append(ch);
2809 str.append(",position=").append(position);
2810 str.append(",count=").append(count);
2811 str.append(",baseCharOffset=").append(baseCharOffset);
2812 str.append(",startPosition=").append(startPosition);
2813 return str.toString();
2814
2815 } // toString():String
2816
2817 } // class ScannedEntity
2818
2819 /**
2820 * Pool of byte buffers for the java.io.Readers.
2821 *
2822 * @xerces.internal
2823 *
2824 * @author Michael Glavassevich, IBM
2825 */
2826 private static final class ByteBufferPool {
2827
2828 private static final int DEFAULT_POOL_SIZE = 3;
2829
2830 private int fPoolSize;
2831 private int fBufferSize;
2832 private byte[][] fByteBufferPool;
2833 private int fDepth;
2834
2835 public ByteBufferPool(int bufferSize) {
2836 this(DEFAULT_POOL_SIZE, bufferSize);
2837 }
2838
2839 public ByteBufferPool(int poolSize, int bufferSize) {
2840 fPoolSize = poolSize;
2841 fBufferSize = bufferSize;
2842 fByteBufferPool = new byte[fPoolSize][];
2843 fDepth = 0;
2844 }
2845
2846 /** Retrieves a byte buffer from the pool. **/
2847 public byte[] getBuffer() {
2848 return (fDepth > 0) ? fByteBufferPool[--fDepth] : new byte[fBufferSize];
2849 }
2850
2851 /** Returns byte buffer to pool. **/
2852 public void returnBuffer(byte[] buffer) {
2853 if (fDepth < fByteBufferPool.length) {
2854 fByteBufferPool[fDepth++] = buffer;
2855 }
2856 }
2857
2858 /** Sets the size of the buffers and dumps the old pool. **/
2859 public void setBufferSize(int bufferSize) {
2860 fBufferSize = bufferSize;
2861 fByteBufferPool = new byte[fPoolSize][];
2862 fDepth = 0;
2863 }
2864 }
2865
2866 /**
2867 * Buffer used in entity manager to reuse character arrays instead
2868 * of creating new ones every time.
2869 *
2870 * @xerces.internal
2871 *
2872 * @author Ankit Pasricha, IBM
2873 */
2874 private static final class CharacterBuffer {
2875
2876 /** character buffer */
2877 private final char[] ch;
2878
2879 /** whether the buffer is for an external or internal scanned entity */
2880 private final boolean isExternal;
2881
2882 public CharacterBuffer(boolean isExternal, int size) {
2883 this.isExternal = isExternal;
2884 ch = new char[size];
2885 }
2886 }
2887
2888 /**
2889 * Stores a number of character buffers and provides it to the entity
2890 * manager to use when an entity is seen.
2891 *
2892 * @xerces.internal
2893 *
2894 * @author Ankit Pasricha, IBM
2895 */
2896 private static final class CharacterBufferPool {
2897
2898 private static final int DEFAULT_POOL_SIZE = 3;
2899
2900 private CharacterBuffer[] fInternalBufferPool;
2901 private CharacterBuffer[] fExternalBufferPool;
2902
2903 private int fExternalBufferSize;
2904 private int fInternalBufferSize;
2905 private int fPoolSize;
2906
2907 private int fInternalTop;
2908 private int fExternalTop;
2909
2910 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) {
2911 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize);
2912 }
2913
2914 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) {
2915 fExternalBufferSize = externalBufferSize;
2916 fInternalBufferSize = internalBufferSize;
2917 fPoolSize = poolSize;
2918 init();
2919 }
2920
2921 /** Initializes buffer pool. **/
2922 private void init() {
2923 fInternalBufferPool = new CharacterBuffer[fPoolSize];
2924 fExternalBufferPool = new CharacterBuffer[fPoolSize];
2925 fInternalTop = -1;
2926 fExternalTop = -1;
2927 }
2928
2929 /** Retrieves buffer from pool. **/
2930 public CharacterBuffer getBuffer(boolean external) {
2931 if (external) {
2932 if (fExternalTop > -1) {
2933 return (CharacterBuffer)fExternalBufferPool[fExternalTop--];
2934 }
2935 else {
2936 return new CharacterBuffer(true, fExternalBufferSize);
2937 }
2938 }
2939 else {
2940 if (fInternalTop > -1) {
2941 return (CharacterBuffer)fInternalBufferPool[fInternalTop--];
2942 }
2943 else {
2944 return new CharacterBuffer(false, fInternalBufferSize);
2945 }
2946 }
2947 }
2948
2949 /** Returns buffer to pool. **/
2950 public void returnBuffer(CharacterBuffer buffer) {
2951 if (buffer.isExternal) {
2952 if (fExternalTop < fExternalBufferPool.length - 1) {
2953 fExternalBufferPool[++fExternalTop] = buffer;
2954 }
2955 }
2956 else if (fInternalTop < fInternalBufferPool.length - 1) {
2957 fInternalBufferPool[++fInternalTop] = buffer;
2958 }
2959 }
2960
2961 /** Sets the size of external buffers and dumps the old pool. **/
2962 public void setExternalBufferSize(int bufferSize) {
2963 fExternalBufferSize = bufferSize;
2964 fExternalBufferPool = new CharacterBuffer[fPoolSize];
2965 fExternalTop = -1;
2966 }
2967 }
2968
2969 /**
2970 * This class wraps the byte inputstreams we're presented with.
2971 * We need it because java.io.InputStreams don't provide
2972 * functionality to reread processed bytes, and they have a habit
2973 * of reading more than one character when you call their read()
2974 * methods. This means that, once we discover the true (declared)
2975 * encoding of a document, we can neither backtrack to read the
2976 * whole doc again nor start reading where we are with a new
2977 * reader.
2978 *
2979 * This class allows rewinding an inputStream by allowing a mark
2980 * to be set, and the stream reset to that position. <strong>The
2981 * class assumes that it needs to read one character per
2982 * invocation when it's read() method is inovked, but uses the
2983 * underlying InputStream's read(char[], offset length) method--it
2984 * won't buffer data read this way!</strong>
2985 *
2986 * @xerces.internal
2987 *
2988 * @author Neil Graham, IBM
2989 * @author Glenn Marcy, IBM
2990 */
2991 protected final class RewindableInputStream extends InputStream {
2992
2993 private InputStream fInputStream;
2994 private byte[] fData;
2995 private int fStartOffset;
2996 private int fEndOffset;
2997 private int fOffset;
2998 private int fLength;
2999 private int fMark;
3000
3001 public RewindableInputStream(InputStream is) {
3002 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
3003 fInputStream = is;
3004 fStartOffset = 0;
3005 fEndOffset = -1;
3006 fOffset = 0;
3007 fLength = 0;
3008 fMark = 0;
3009 }
3010
3011 public void setStartOffset(int offset) {
3012 fStartOffset = offset;
3013 }
3014
3015 public void rewind() {
3016 fOffset = fStartOffset;
3017 }
3018
3019 public int read() throws IOException {
3020 int b = 0;
3021 if (fOffset < fLength) {
3022 return fData[fOffset++] & 0xff;
3023 }
3024 if (fOffset == fEndOffset) {
3025 return -1;
3026 }
3027 if (fOffset == fData.length) {
3028 byte[] newData = new byte[fOffset << 1];
3029 System.arraycopy(fData, 0, newData, 0, fOffset);
3030 fData = newData;
3031 }
3032 b = fInputStream.read();
3033 if (b == -1) {
3034 fEndOffset = fOffset;
3035 return -1;
3036 }
3037 fData[fLength++] = (byte)b;
3038 fOffset++;
3039 return b & 0xff;
3040 }
3041
3042 public int read(byte[] b, int off, int len) throws IOException {
3043 int bytesLeft = fLength - fOffset;
3044 if (bytesLeft == 0) {
3045 if (fOffset == fEndOffset) {
3046 return -1;
3047 }
3048 // better get some more for the voracious reader...
3049 if(fCurrentEntity.mayReadChunks) {
3050 return fInputStream.read(b, off, len);
3051 }
3052 int returnedVal = read();
3053 if(returnedVal == -1) {
3054 fEndOffset = fOffset;
3055 return -1;
3056 }
3057 b[off] = (byte)returnedVal;
3058 return 1;
3059 }
3060 if (len < bytesLeft) {
3061 if (len <= 0) {
3062 return 0;
3063 }
3064 }
3065 else {
3066 len = bytesLeft;
3067 }
3068 if (b != null) {
3069 System.arraycopy(fData, fOffset, b, off, len);
3070 }
3071 fOffset += len;
3072 return len;
3073 }
3074
3075 public long skip(long n)
3076 throws IOException
3077 {
3078 int bytesLeft;
3079 if (n <= 0) {
3080 return 0;
3081 }
3082 bytesLeft = fLength - fOffset;
3083 if (bytesLeft == 0) {
3084 if (fOffset == fEndOffset) {
3085 return 0;
3086 }
3087 return fInputStream.skip(n);
3088 }
3089 if (n <= bytesLeft) {
3090 fOffset += n;
3091 return n;
3092 }
3093 fOffset += bytesLeft;
3094 if (fOffset == fEndOffset) {
3095 return bytesLeft;
3096 }
3097 n -= bytesLeft;
3098 /*
3099 * In a manner of speaking, when this class isn't permitting more
3100 * than one byte at a time to be read, it is "blocking". The
3101 * available() method should indicate how much can be read without
3102 * blocking, so while we're in this mode, it should only indicate
3103 * that bytes in its buffer are available; otherwise, the result of
3104 * available() on the underlying InputStream is appropriate.
3105 */
3106 return fInputStream.skip(n) + bytesLeft;
3107 }
3108
3109 public int available() throws IOException {
3110 int bytesLeft = fLength - fOffset;
3111 if (bytesLeft == 0) {
3112 if (fOffset == fEndOffset) {
3113 return -1;
3114 }
3115 return fCurrentEntity.mayReadChunks ? fInputStream.available()
3116 : 0;
3117 }
3118 return bytesLeft;
3119 }
3120
3121 public void mark(int howMuch) {
3122 fMark = fOffset;
3123 }
3124
3125 public void reset() {
3126 fOffset = fMark;
3127 }
3128
3129 public boolean markSupported() {
3130 return true;
3131 }
3132
3133 public void close() throws IOException {
3134 if (fInputStream != null) {
3135 fInputStream.close();
3136 fInputStream = null;
3137 }
3138 }
3139 } // end of RewindableInputStream class
3140
3141 } // class XMLEntityManager