Source code: org/apache/xerces/readers/XMLEntityHandler.java
1 /*
2 * The Apache Software License, Version 1.1
3 *
4 *
5 * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6 * reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Apache Software Foundation (http://www.apache.org/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Xerces" and "Apache Software Foundation" must
28 * not be used to endorse or promote products derived from this
29 * software without prior written permission. For written
30 * permission, please contact apache@apache.org.
31 *
32 * 5. Products derived from this software may not be called "Apache",
33 * nor may "Apache" appear in their name, without prior written
34 * permission of the Apache Software Foundation.
35 *
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This software consists of voluntary contributions made by many
51 * individuals on behalf of the Apache Software Foundation and was
52 * originally based on software copyright (c) 1999, International
53 * Business Machines, Inc., http://www.apache.org. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58 package org.apache.xerces.readers;
59
60 import org.apache.xerces.framework.XMLErrorReporter;
61 import org.apache.xerces.utils.QName;
62 import org.apache.xerces.utils.StringPool;
63 import org.xml.sax.EntityResolver;
64 import org.xml.sax.InputSource;
65 import org.xml.sax.Locator;
66 import java.io.InputStream;
67
68 /**
69 * This is the interface used for entity management. This interface
70 * is typically implemented by the "parser" class to provide entity
71 * management services for the scanner classes.
72 *
73 * @version $Id: XMLEntityHandler.java,v 1.3 2000/10/07 18:06:55 markd Exp $
74 */
75 public interface XMLEntityHandler extends Locator {
76
77 /**
78 * Special return values for scanCharRef method. The normal return
79 * value is a unicode character. These error conditions are defined
80 * using invalid XML unicode code points.
81 */
82 public static final int
83 CHARREF_RESULT_SEMICOLON_REQUIRED = -1,
84 CHARREF_RESULT_INVALID_CHAR = -2,
85 CHARREF_RESULT_OUT_OF_RANGE = -3;
86
87 /**
88 * Special return values for scanStringLiteral method. The normal
89 * return value is a StringPool handle. These error conditions are
90 * defined using invalid indices.
91 */
92 public static final int
93 STRINGLIT_RESULT_QUOTE_REQUIRED = -1,
94 STRINGLIT_RESULT_INVALID_CHAR = -2;
95
96 /**
97 * Special return values for scanAttValue method. The normal return
98 * value is a StringPool handle for a simple AttValue that was already
99 * correctly normalized for CDATA in the original document. These
100 * other return values either indicate an error or that the AttValue
101 * needs further processing.
102 */
103 public static final int
104 ATTVALUE_RESULT_COMPLEX = -1,
105 ATTVALUE_RESULT_LESSTHAN = -2,
106 ATTVALUE_RESULT_INVALID_CHAR = -3;
107
108 /**
109 * Special return values for scanEntityValue method. The normal return
110 * value is a StringPool handle for a simple EntityValue that was entirely
111 * contained within the original document. These other return values can
112 * either indicate an error or that the EntityValue needs further processing.
113 */
114 public static final int
115 ENTITYVALUE_RESULT_FINISHED = -1,
116 ENTITYVALUE_RESULT_REFERENCE = -2,
117 ENTITYVALUE_RESULT_PEREF = -3,
118 ENTITYVALUE_RESULT_INVALID_CHAR = -4,
119 ENTITYVALUE_RESULT_END_OF_INPUT = -5;
120
121 /**
122 * Return values for the scanContent method.
123 */
124 public static final int
125 CONTENT_RESULT_START_OF_PI = 0,
126 CONTENT_RESULT_START_OF_COMMENT = 1,
127 CONTENT_RESULT_START_OF_CDSECT = 2,
128 CONTENT_RESULT_END_OF_CDSECT = 3,
129 CONTENT_RESULT_START_OF_ETAG = 4,
130 CONTENT_RESULT_MATCHING_ETAG = 5,
131 CONTENT_RESULT_START_OF_ELEMENT = 6,
132 CONTENT_RESULT_START_OF_CHARREF = 7,
133 CONTENT_RESULT_START_OF_ENTITYREF = 8,
134 CONTENT_RESULT_INVALID_CHAR = 9,
135 CONTENT_RESULT_MARKUP_NOT_RECOGNIZED = 10,
136 CONTENT_RESULT_MARKUP_END_OF_INPUT = 11,
137 CONTENT_RESULT_REFERENCE_END_OF_INPUT = 12;
138
139 /**
140 * This is an enumeration of all the defined entity types.
141 * These are provided to communicate state information to
142 * the clients of the parser.
143 */
144 public static final int
145 ENTITYTYPE_INTERNAL_PE = 0,
146 ENTITYTYPE_EXTERNAL_PE = 1,
147 ENTITYTYPE_INTERNAL = 2,
148 ENTITYTYPE_EXTERNAL = 3,
149 ENTITYTYPE_UNPARSED = 4,
150 ENTITYTYPE_DOCUMENT = 5,
151 ENTITYTYPE_EXTERNAL_SUBSET = 6;
152
153 /**
154 * This is an enumeration of all the defined contexts in which
155 * an entity reference may appear. The order is important, as
156 * all explicit general entity references must appear first and
157 * the last of these must be ENTITYREF_IN_CONTENT. This permits
158 * the test "(context <= ENTITYREF_IN_CONTENT)" to be used as a
159 * quick check for a general entity reference.
160 *
161 * @see #startReadingFromEntity
162 */
163 public static final int
164 ENTITYREF_IN_ATTVALUE = 0,
165 ENTITYREF_IN_DEFAULTATTVALUE = 1,
166 ENTITYREF_IN_CONTENT = 2,
167 ENTITYREF_IN_DTD_AS_MARKUP = 3,
168 ENTITYREF_IN_ENTITYVALUE = 4,
169 ENTITYREF_IN_DTD_WITHIN_MARKUP = 5,
170 ENTITYREF_DOCUMENT = 6,
171 ENTITYREF_EXTERNAL_SUBSET = 7;
172
173 /**
174 * Start reading document from an InputSource.
175 *
176 * @param source The input source for the document to process.
177 * @return <code>true</code> if we were able to open the document source;
178 * <code>false</code> otherwise.
179 * @exception java.lang.Exception
180 */
181 public boolean startReadingFromDocument(InputSource source) throws Exception;
182
183 /**
184 * Start reading from this entity.
185 *
186 * Note that the reader depth is not used by the reader, but is made
187 * available so that it may be retrieved at end of input to test that
188 * gramatical structures are properly nested within entities.
189 *
190 * @param entityName The entity name handle in the string pool.
191 * @param readerDepth The depth to associate with the reader for this entity.
192 * @param context The context of the entity reference; see ENTITYREF_IN_*.
193 * @return <code>true</code> if the entity might start with a TextDecl;
194 * <code>false</code> otherwise.
195 * @exception java.lang.Exception
196 */
197 public boolean startReadingFromEntity(int entityName, int readerDepth, int entityContext) throws Exception;
198
199 /**
200 * Expand the system identifier relative to the entity that we are processing.
201 *
202 * @return The expanded system identifier.
203 */
204 public String expandSystemId(String systemId);
205
206 /**
207 * DTD specific entity handler
208 */
209 public interface DTDHandler {
210 /**
211 * Start reading from the external subset of the DTD.
212 *
213 * @param publicId The public identifier for the external subset.
214 * @param systemId The system identifier for the external subset.
215 * @param readerDepth The depth to associate with the reader for the external subset.
216 * @exception java.lang.Exception
217 */
218 public void startReadingFromExternalSubset(String publicId, String systemId, int readerDepth) throws Exception;
219
220 /**
221 * Finished reading from the external subset of the DTD.
222 * @exception java.lang.Exception
223 */
224 public void stopReadingFromExternalSubset() throws Exception;
225
226 /**
227 * Start the scope of an entity declaration.
228 *
229 * @return <code>true</code> on success; otherwise
230 * <code>false</code> if the entity declaration is recursive.
231 * @exception java.lang.Exception
232 */
233 public boolean startEntityDecl(boolean isPE, int entityName) throws Exception;
234
235 /**
236 * End the scope of an entity declaration.
237 * @exception java.lang.Exception
238 */
239 public void endEntityDecl() throws Exception;
240
241 /**
242 * Declare entities and notations.
243 */
244 public int addInternalPEDecl(int entityName, int value, boolean isExternal) throws Exception;
245 public int addExternalPEDecl(int entityName, int publicId, int systemId, boolean isExternal) throws Exception;
246 public int addInternalEntityDecl(int entityName, int value, boolean isExternal) throws Exception;
247 public int addExternalEntityDecl(int entityName, int publicId, int systemId, boolean isExternal) throws Exception;
248 public int addUnparsedEntityDecl(int entityName, int publicId, int systemId, int notationName, boolean isExternal) throws Exception;
249 public int addNotationDecl(int notationName, int publicId, int systemId, boolean isExternal) throws Exception;
250
251 /**
252 * Check for unparsed entity.
253 *
254 * @param entityName The string handle for the entity name.
255 * @return <code>true</code> if entityName is an unparsed entity; otherwise
256 * <code>false</code> if entityName is not declared or not an unparsed entity.
257 */
258 public boolean isUnparsedEntity(int entityName);
259
260 /**
261 * Check for declared notation.
262 *
263 * @param notationName The string handle for the notation name.
264 * @return <code>true</code> if notationName is a declared notation; otherwise
265 * <code>false</code> if notationName is not declared.
266 */
267 public boolean isNotationDeclared(int entityName);
268
269 /**
270 * Remember a required but undeclared notation.
271 */
272 public void addRequiredNotation(int notationName, Locator locator, int majorCode, int minorCode, Object[] args);
273
274 /**
275 * Check required but undeclared notations.
276 */
277 public void checkRequiredNotations() throws Exception;
278 }
279
280 /**
281 * Return a unique identifier for the current reader.
282 */
283 public int getReaderId();
284
285 /**
286 * Set the depth for the current reader.
287 */
288 public void setReaderDepth(int depth);
289
290 /**
291 * Return the depth set for the current reader.
292 */
293 public int getReaderDepth();
294
295 /**
296 * Return the current reader.
297 */
298 public EntityReader getEntityReader();
299
300 /**
301 * This method is called by the reader subclasses at the
302 * end of input.
303 *
304 * @return The reader to use next.
305 * @exception java.lang.Exception
306 */
307 public EntityReader changeReaders() throws Exception;
308
309 /**
310 * This interface is used to store and retrieve character
311 * sequences. The primary use is for a literal data buffer
312 * where we can construct the values for literal entity
313 * replacement text. When all of the characters for the
314 * replacement text have been added to the buffer, the
315 * contents are added to the string pool for later use
316 * in constructing a StringReader if the entity is referenced.
317 */
318 public interface CharBuffer {
319 /**
320 * Append a character to this buffer.
321 *
322 * @param ch The character.
323 */
324 public void append(char ch);
325
326 /**
327 * Append characters to this buffer.
328 *
329 * @param chars The char array containing the characters.
330 * @param offset The offset within the char array of the first character to append.
331 * @param length The number of characters to append.
332 */
333 public void append(char[] chars, int offset, int length);
334
335 /**
336 * Get the current length of the buffer. This is also the
337 * offset of the next character that is added to the buffer.
338 *
339 * @return The length of the buffer.
340 */
341 public int length();
342
343 /**
344 * Add a region of this buffer to the string pool.
345 *
346 * @param offset The offset within this buffer of the first character of the string.
347 * @param length The number of characters in the string.
348 * @return The <code>StringPool</code> handle of the string.
349 */
350 public int addString(int offset, int length);
351 }
352
353 /**
354 * Set the character data handler.
355 */
356 public void setCharDataHandler(XMLEntityHandler.CharDataHandler charDataHandler);
357
358 /**
359 * Get the character data handler.
360 */
361 public XMLEntityHandler.CharDataHandler getCharDataHandler();
362
363 /**
364 * Interface for passing character data.
365 */
366 public interface CharDataHandler {
367 /**
368 * Process character data, character array version
369 *
370 * @param chars character buffer to be processed
371 * @param offset offset in buffer where the data starts
372 * @param length length of characters to be processed
373 * @exception java.lang.Exception
374 */
375 public void processCharacters(char[] chars, int offset, int length) throws Exception;
376
377 /**
378 * Process character data, <code>StringPool</code> handle version
379 *
380 * @param stringHandle <code>StringPool</code> handle to the character data
381 * @exception java.lang.Exception
382 */
383 public void processCharacters(int stringHandle) throws Exception;
384
385 /**
386 * Process white space data, character array version
387 *
388 * @param chars character buffer to be processed
389 * @param offset offset in buffer where the data starts
390 * @param length length of whitespace to be processed
391 * @exception java.lang.Exception
392 */
393 public void processWhitespace(char[] chars, int offset, int length) throws Exception;
394
395 /**
396 * Process white space data, <code>StringPool</code> handle version
397 *
398 * @param stringHandle <code>StringPool</code> handle to the whitespace
399 * @exception java.lang.Exception
400 */
401 public void processWhitespace(int stringHandle) throws Exception;
402 }
403
404 /**
405 * This is the interface for scanners to process input data
406 * from entities without needing to know the details of the
407 * underlying storage of those entities, or their encodings.
408 *
409 * The methods in this interface have been refined over time
410 * to a rough balance between keeping the XML grammar dependent
411 * code within the scanner classes, and allowing high performance
412 * processing of XML documents.
413 */
414 public interface EntityReader {
415 /**
416 * Return the current offset within this reader.
417 *
418 * @return The offset.
419 */
420 public int currentOffset();
421
422 /**
423 * Return the line number of the current position within the document that we are processing.
424 *
425 * @return The current line number.
426 */
427 public int getLineNumber();
428
429 /**
430 * Return the column number of the current position within the document that we are processing.
431 *
432 * @return The current column number.
433 */
434 public int getColumnNumber();
435
436 /**
437 * This method is provided for scanner implementations.
438 */
439 public void setInCDSect(boolean inCDSect);
440
441 /**
442 * This method is provided for scanner implementations.
443 */
444 public boolean getInCDSect();
445
446 /**
447 * Append the characters processed by this reader associated with <code>offset</code> and
448 * <code>length</code> to the <code>CharBuffer</code>.
449 *
450 * @param charBuffer The <code>CharBuffer</code> to append the characters to.
451 * @param offset The offset within this reader where the copy should start.
452 * @param length The length within this reader where the copy should stop.
453 */
454 public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length);
455
456 /**
457 * Add a string to the <code>StringPool</code> from the characters scanned using this
458 * reader as described by <code>offset</code> and <code>length</code>.
459 *
460 * @param offset The offset within this reader where the characters start.
461 * @param length The length within this reader where the characters end.
462 * @return The <code>StringPool</code> handle for the string.
463 */
464 public int addString(int offset, int length);
465
466 /**
467 * Add a symbol to the <code>StringPool</code> from the characters scanned using this
468 * reader as described by <code>offset</code> and <code>length</code>.
469 *
470 * @param offset The offset within this reader where the characters start.
471 * @param length The length within this reader where the characters end.
472 * @return The <code>StringPool</code> handle for the symbol.
473 */
474 public int addSymbol(int offset, int length);
475
476 /**
477 * Test that the current character is a <code>ch</code> character.
478 *
479 * @param ch The character to match against.
480 * @param skipPastChar If <code>true</code>, we advance past the matched character.
481 * @return <code>true</code> if the current character is a <code>ch</code> character;
482 * <code>false</code> otherwise.
483 * @exception java.lang.Exception
484 */
485 public boolean lookingAtChar(char ch, boolean skipPastChar) throws Exception;
486
487 /**
488 * Test that the current character is valid.
489 *
490 * @param skipPastChar If <code>true</code>, we advance past the valid character.
491 * @return <code>true</code> if the current character is valid;
492 * <code>false</code> otherwise.
493 * @exception java.lang.Exception
494 */
495 public boolean lookingAtValidChar(boolean skipPastChar) throws Exception;
496
497 /**
498 * Test that the current character is a whitespace character.
499 *
500 * @param skipPastChar If <code>true</code>, we advance past the whitespace character.
501 * @return <code>true</code> if the current character is whitespace;
502 * <code>false</code> otherwise.
503 * @exception java.lang.Exception
504 */
505 public boolean lookingAtSpace(boolean skipPastChar) throws Exception;
506
507 /**
508 * Advance through the input data up to the next <code>ch</code> character.
509 *
510 * @param ch The character to search for.
511 * @exception java.lang.Exception
512 */
513 public void skipToChar(char ch) throws Exception;
514
515 /**
516 * Skip past whitespace characters starting at the current position.
517 * @exception java.lang.Exception
518 */
519 public void skipPastSpaces() throws Exception;
520
521 /**
522 * Skip past a sequence of characters that match the XML definition of a Name.
523 * @exception java.lang.Exception
524 */
525 public void skipPastName(char fastcheck) throws Exception;
526
527 /**
528 * Skip past a sequence of characters that match the XML definition of an Nmtoken.
529 * @exception java.lang.Exception
530 */
531 public void skipPastNmtoken(char fastcheck) throws Exception;
532
533 /**
534 * Skip past a sequence of characters that matches the specified character array.
535 *
536 * @param s The characters to match.
537 * @return <code>true</code> if the current character is valid;
538 * <code>false</code> otherwise.
539 * @exception java.lang.Exception
540 */
541 public boolean skippedString(char[] s) throws Exception;
542
543 /**
544 * Scan an invalid character.
545 *
546 * @return The invalid character as an integer, or -1 if there was a bad encoding.
547 * @exception java.lang.Exception
548 */
549 public int scanInvalidChar() throws Exception;
550
551 /**
552 * Scan a character reference.
553 *
554 * @return The value of the character, or one of the following error codes:
555 *
556 * CHARREF_RESULT_SEMICOLON_REQUIRED
557 * CHARREF_RESULT_INVALID_CHAR
558 * CHARREF_RESULT_OUT_OF_RANGE
559 * @exception java.lang.Exception
560 */
561 public int scanCharRef(boolean isHexadecimal) throws Exception;
562
563 /**
564 * Scan a string literal.
565 *
566 * @return The <code>StringPool</code> handle for the string that
567 * was scanned, or one of the following error codes:
568 *
569 * STRINGLIT_RESULT_QUOTE_REQUIRED
570 * STRINGLIT_RESULT_INVALID_CHAR
571 * @exception java.lang.Exception
572 */
573 public int scanStringLiteral() throws Exception;
574
575 /**
576 * Scan an attribute value.
577 *
578 * @param qchar The initial quote character, either a single or double quote.
579 * @return The <code>StringPool</code> handle for the string that
580 * was scanned, or one of the following error codes:
581 *
582 * ATTVALUE_RESULT_COMPLEX
583 * ATTVALUE_RESULT_LESSTHAN
584 * ATTVALUE_RESULT_INVALID_CHAR
585 * @exception java.lang.Exception
586 */
587 public int scanAttValue(char qchar, boolean asSymbol) throws Exception;
588
589 /**
590 * Scan an entity value.
591 *
592 * @param qchar The initial quote character, either a single or double quote.
593 * @return The <code>StringPool</code> handle for the string that
594 * was scanned, or one of the following error codes:
595 *
596 * ENTITYVALUE_RESULT_FINISHED
597 * ENTITYVALUE_RESULT_REFERENCE
598 * ENTITYVALUE_RESULT_PEREF
599 * ENTITYVALUE_RESULT_INVALID_CHAR
600 * ENTITYVALUE_RESULT_END_OF_INPUT
601 * @exception java.lang.Exception
602 */
603 public int scanEntityValue(int qchar, boolean createString) throws Exception;
604
605 /**
606 * Add a sequence of characters that match the XML definition of a Name to the <code>StringPool</code>.
607 *
608 * If we find a name at the current position we will add it to the <code>StringPool</code>
609 * as a symbol and will return the string pool handle for that symbol to the caller.
610 *
611 * @param fastcheck A character that is not a legal name character that is provided as a
612 * hint to the reader of a character likely to terminate the Name.
613 * @return The <code>StringPool</code> handle for the name that was scanned,
614 * or -1 if a name was not found at the current position within the input data.
615 * @exception java.lang.Exception
616 */
617 public int scanName(char fastcheck) throws Exception;
618
619 /**
620 * Scan the name that is expected at the current position in the document.
621 *
622 * This method is invoked when we are scanning the element type in an end tag
623 * that must match the element type in the corresponding start tag.
624 *
625 * @param fastcheck A character that is not a legal name character that is provided as a
626 * hint to the reader of a character likely to terminate the Name.
627 * @param expectedName The characters of the name we expect.
628 * @return <code>true</code> if we scanned the name we expected to find; otherwise
629 * <code>false</code> if we did not.
630 * @exception java.lang.Exception
631 */
632 public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception;
633
634 /**
635 * Add a sequence of characters that match the XML Namespaces definition of a QName
636 * to the <code>StringPool</code>.
637 *
638 * If we find a QName at the current position we will add it to the <code>StringPool</code>
639 * and will return the string pool handle of that QName to the caller.
640 *
641 * @param fastcheck A character that is not a legal name character that is provided as a
642 * hint to the reader of a character likely to terminate the Name.
643 * <!--
644 * @return The <code>StringPool</code> handle for the QName that was scanned,
645 * or -1 if a name was not found at the current position within the input data.
646 * -->
647 * @exception java.lang.Exception
648 */
649 public void scanQName(char fastcheck, QName qname) throws Exception;
650
651 /**
652 * Skip through the input while we are looking at character data.
653 *
654 * @param elementType The element type handle in the StringPool.
655 * @return One of the following result codes:
656 *
657 * CONTENT_RESULT_START_OF_PI
658 * CONTENT_RESULT_START_OF_COMMENT
659 * CONTENT_RESULT_START_OF_CDSECT
660 * CONTENT_RESULT_END_OF_CDSECT
661 * CONTENT_RESULT_START_OF_ETAG
662 * CONTENT_RESULT_MATCHING_ETAG
663 * CONTENT_RESULT_START_OF_ELEMENT
664 * CONTENT_RESULT_START_OF_CHARREF
665 * CONTENT_RESULT_START_OF_ENTITYREF
666 * CONTENT_RESULT_INVALID_CHAR
667 * CONTENT_RESULT_MARKUP_NOT_RECOGNIZED
668 * CONTENT_RESULT_MARKUP_END_OF_INPUT
669 * CONTENT_RESULT_REFERENCE_END_OF_INPUT
670 * @exception java.lang.Exception
671 */
672 public int scanContent(QName element) throws Exception;
673 }
674 }