1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.xerces.impl;
19
20 import java.io.EOFException;
21 import java.io.IOException;
22 import java.util.Locale;
23
24 import org.apache.xerces.impl.io.UCSReader;
25 import org.apache.xerces.impl.msg.XMLMessageFormatter;
26 import org.apache.xerces.util.SymbolTable;
27 import org.apache.xerces.util.XMLChar;
28 import org.apache.xerces.util.XMLStringBuffer;
29 import org.apache.xerces.xni.QName;
30 import org.apache.xerces.xni.XMLLocator;
31 import org.apache.xerces.xni.XMLString;
32
33 /**
34 * Implements the entity scanner methods.
35 *
36 * @xerces.internal
37 *
38 * @author Andy Clark, IBM
39 * @author Neil Graham, IBM
40 * @version $Id: XMLEntityScanner.java 568411 2007-08-22 04:34:13Z mrglavas $
41 */
42 public class XMLEntityScanner implements XMLLocator {
43
44 // constants
45 private static final boolean DEBUG_ENCODINGS = false;
46 private static final boolean DEBUG_BUFFER = false;
47
48 /**
49 * To signal the end of the document entity, this exception will be thrown.
50 */
51 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
52 private static final long serialVersionUID = 980337771224675268L;
53 public Throwable fillInStackTrace() {
54 return this;
55 }
56 };
57
58 //
59 // Data
60 //
61
62 private XMLEntityManager fEntityManager = null;
63 protected XMLEntityManager.ScannedEntity fCurrentEntity = null;
64
65 protected SymbolTable fSymbolTable = null;
66
67 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
68
69 /**
70 * Error reporter. This property identifier is:
71 * http://apache.org/xml/properties/internal/error-reporter
72 */
73 protected XMLErrorReporter fErrorReporter;
74 //
75 // Constructors
76 //
77
78 /** Default constructor. */
79 public XMLEntityScanner() {
80 } // <init>()
81
82 //
83 // XMLEntityScanner methods
84 //
85
86 /**
87 * Returns the base system identifier of the currently scanned
88 * entity, or null if none is available.
89 */
90 public final String getBaseSystemId() {
91 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
92 } // getBaseSystemId():String
93
94 /**
95 * Sets the encoding of the scanner. This method is used by the
96 * scanners if the XMLDecl or TextDecl line contains an encoding
97 * pseudo-attribute.
98 * <p>
99 * <strong>Note:</strong> The underlying character reader on the
100 * current entity will be changed to accomodate the new encoding.
101 * However, the new encoding is ignored if the current reader was
102 * not constructed from an input stream (e.g. an external entity
103 * that is resolved directly to the appropriate java.io.Reader
104 * object).
105 *
106 * @param encoding The IANA encoding name of the new encoding.
107 *
108 * @throws IOException Thrown if the new encoding is not supported.
109 *
110 * @see org.apache.xerces.util.EncodingMap
111 */
112 public final void setEncoding(String encoding) throws IOException {
113
114 if (DEBUG_ENCODINGS) {
115 System.out.println("$$$ setEncoding: "+encoding);
116 }
117
118 if (fCurrentEntity.stream != null) {
119 // if the encoding is the same, don't change the reader and
120 // re-use the original reader used by the OneCharReader
121 // NOTE: Besides saving an object, this overcomes deficiencies
122 // in the UTF-16 reader supplied with the standard Java
123 // distribution (up to and including 1.3). The UTF-16
124 // decoder buffers 8K blocks even when only asked to read
125 // a single char! -Ac
126 if (fCurrentEntity.encoding == null ||
127 !fCurrentEntity.encoding.equals(encoding)) {
128 // UTF-16 is a bit of a special case. If the encoding is UTF-16,
129 // and we know the endian-ness, we shouldn't change readers.
130 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
131 // the endian-ness from the encoding we presently have.
132 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
133 String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
134 if(ENCODING.equals("UTF-16")) return;
135 if(ENCODING.equals("ISO-10646-UCS-4")) {
136 if(fCurrentEntity.encoding.equals("UTF-16BE")) {
137 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
138 } else {
139 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
140 }
141 return;
142 }
143 if(ENCODING.equals("ISO-10646-UCS-2")) {
144 if(fCurrentEntity.encoding.equals("UTF-16BE")) {
145 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
146 } else {
147 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
148 }
149 return;
150 }
151 }
152 // wrap a new reader around the input stream, changing
153 // the encoding
154 if (DEBUG_ENCODINGS) {
155 System.out.println("$$$ creating new reader from stream: "+
156 fCurrentEntity.stream);
157 }
158 //fCurrentEntity.stream.reset();
159 fCurrentEntity.setReader(fCurrentEntity.stream, encoding, null);
160 fCurrentEntity.encoding = encoding;
161 } else {
162 if (DEBUG_ENCODINGS)
163 System.out.println("$$$ reusing old reader on stream");
164 }
165 }
166
167 } // setEncoding(String)
168
169 /**
170 * Sets the XML version. This method is used by the
171 * scanners to report the value of the version pseudo-attribute
172 * in an XML or text declaration.
173 *
174 * @param xmlVersion the XML version of the current entity
175 */
176 public final void setXMLVersion(String xmlVersion) {
177 fCurrentEntity.xmlVersion = xmlVersion;
178 } // setXMLVersion(String)
179
180 /** Returns true if the current entity being scanned is external. */
181 public final boolean isExternal() {
182 return fCurrentEntity.isExternal();
183 } // isExternal():boolean
184
185 /**
186 * Returns the next character on the input.
187 * <p>
188 * <strong>Note:</strong> The character is <em>not</em> consumed.
189 *
190 * @throws IOException Thrown if i/o error occurs.
191 * @throws EOFException Thrown on end of file.
192 */
193 public int peekChar() throws IOException {
194 if (DEBUG_BUFFER) {
195 System.out.print("(peekChar: ");
196 XMLEntityManager.print(fCurrentEntity);
197 System.out.println();
198 }
199
200 // load more characters, if needed
201 if (fCurrentEntity.position == fCurrentEntity.count) {
202 load(0, true);
203 }
204
205 // peek at character
206 int c = fCurrentEntity.ch[fCurrentEntity.position];
207
208 // return peeked character
209 if (DEBUG_BUFFER) {
210 System.out.print(")peekChar: ");
211 XMLEntityManager.print(fCurrentEntity);
212 if (fCurrentEntity.isExternal()) {
213 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
214 }
215 else {
216 System.out.println(" -> '"+(char)c+"'");
217 }
218 }
219 if (fCurrentEntity.isExternal()) {
220 return c != '\r' ? c : '\n';
221 }
222 else {
223 return c;
224 }
225
226 } // peekChar():int
227
228 /**
229 * Returns the next character on the input.
230 * <p>
231 * <strong>Note:</strong> The character is consumed.
232 *
233 * @throws IOException Thrown if i/o error occurs.
234 * @throws EOFException Thrown on end of file.
235 */
236 public int scanChar() throws IOException {
237 if (DEBUG_BUFFER) {
238 System.out.print("(scanChar: ");
239 XMLEntityManager.print(fCurrentEntity);
240 System.out.println();
241 }
242
243 // load more characters, if needed
244 if (fCurrentEntity.position == fCurrentEntity.count) {
245 load(0, true);
246 }
247
248 // scan character
249 int c = fCurrentEntity.ch[fCurrentEntity.position++];
250 boolean external = false;
251 if (c == '\n' ||
252 (c == '\r' && (external = fCurrentEntity.isExternal()))) {
253 fCurrentEntity.lineNumber++;
254 fCurrentEntity.columnNumber = 1;
255 if (fCurrentEntity.position == fCurrentEntity.count) {
256 fCurrentEntity.ch[0] = (char)c;
257 load(1, false);
258 }
259 if (c == '\r' && external) {
260 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
261 fCurrentEntity.position--;
262 }
263 c = '\n';
264 }
265 }
266
267 // return character that was scanned
268 if (DEBUG_BUFFER) {
269 System.out.print(")scanChar: ");
270 XMLEntityManager.print(fCurrentEntity);
271 System.out.println(" -> '"+(char)c+"'");
272 }
273 fCurrentEntity.columnNumber++;
274 return c;
275
276 } // scanChar():int
277
278 /**
279 * Returns a string matching the NMTOKEN production appearing immediately
280 * on the input as a symbol, or null if NMTOKEN Name string is present.
281 * <p>
282 * <strong>Note:</strong> The NMTOKEN characters are consumed.
283 * <p>
284 * <strong>Note:</strong> The string returned must be a symbol. The
285 * SymbolTable can be used for this purpose.
286 *
287 * @throws IOException Thrown if i/o error occurs.
288 * @throws EOFException Thrown on end of file.
289 *
290 * @see org.apache.xerces.util.SymbolTable
291 * @see org.apache.xerces.util.XMLChar#isName
292 */
293 public String scanNmtoken() throws IOException {
294 if (DEBUG_BUFFER) {
295 System.out.print("(scanNmtoken: ");
296 XMLEntityManager.print(fCurrentEntity);
297 System.out.println();
298 }
299
300 // load more characters, if needed
301 if (fCurrentEntity.position == fCurrentEntity.count) {
302 load(0, true);
303 }
304
305 // scan nmtoken
306 int offset = fCurrentEntity.position;
307 while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
308 if (++fCurrentEntity.position == fCurrentEntity.count) {
309 int length = fCurrentEntity.position - offset;
310 if (length == fCurrentEntity.ch.length) {
311 // bad luck we have to resize our buffer
312 char[] tmp = new char[fCurrentEntity.ch.length << 1];
313 System.arraycopy(fCurrentEntity.ch, offset,
314 tmp, 0, length);
315 fCurrentEntity.ch = tmp;
316 }
317 else {
318 System.arraycopy(fCurrentEntity.ch, offset,
319 fCurrentEntity.ch, 0, length);
320 }
321 offset = 0;
322 if (load(length, false)) {
323 break;
324 }
325 }
326 }
327 int length = fCurrentEntity.position - offset;
328 fCurrentEntity.columnNumber += length;
329
330 // return nmtoken
331 String symbol = null;
332 if (length > 0) {
333 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
334 }
335 if (DEBUG_BUFFER) {
336 System.out.print(")scanNmtoken: ");
337 XMLEntityManager.print(fCurrentEntity);
338 System.out.println(" -> "+String.valueOf(symbol));
339 }
340 return symbol;
341
342 } // scanNmtoken():String
343
344 /**
345 * Returns a string matching the Name production appearing immediately
346 * on the input as a symbol, or null if no Name string is present.
347 * <p>
348 * <strong>Note:</strong> The Name characters are consumed.
349 * <p>
350 * <strong>Note:</strong> The string returned must be a symbol. The
351 * SymbolTable can be used for this purpose.
352 *
353 * @throws IOException Thrown if i/o error occurs.
354 * @throws EOFException Thrown on end of file.
355 *
356 * @see org.apache.xerces.util.SymbolTable
357 * @see org.apache.xerces.util.XMLChar#isName
358 * @see org.apache.xerces.util.XMLChar#isNameStart
359 */
360 public String scanName() throws IOException {
361 if (DEBUG_BUFFER) {
362 System.out.print("(scanName: ");
363 XMLEntityManager.print(fCurrentEntity);
364 System.out.println();
365 }
366
367 // load more characters, if needed
368 if (fCurrentEntity.position == fCurrentEntity.count) {
369 load(0, true);
370 }
371
372 // scan name
373 int offset = fCurrentEntity.position;
374 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
375 if (++fCurrentEntity.position == fCurrentEntity.count) {
376 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
377 offset = 0;
378 if (load(1, false)) {
379 fCurrentEntity.columnNumber++;
380 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
381 if (DEBUG_BUFFER) {
382 System.out.print(")scanName: ");
383 XMLEntityManager.print(fCurrentEntity);
384 System.out.println(" -> "+String.valueOf(symbol));
385 }
386 return symbol;
387 }
388 }
389 while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
390 if (++fCurrentEntity.position == fCurrentEntity.count) {
391 int length = fCurrentEntity.position - offset;
392 if (length == fCurrentEntity.ch.length) {
393 // bad luck we have to resize our buffer
394 char[] tmp = new char[fCurrentEntity.ch.length << 1];
395 System.arraycopy(fCurrentEntity.ch, offset,
396 tmp, 0, length);
397 fCurrentEntity.ch = tmp;
398 }
399 else {
400 System.arraycopy(fCurrentEntity.ch, offset,
401 fCurrentEntity.ch, 0, length);
402 }
403 offset = 0;
404 if (load(length, false)) {
405 break;
406 }
407 }
408 }
409 }
410 int length = fCurrentEntity.position - offset;
411 fCurrentEntity.columnNumber += length;
412
413 // return name
414 String symbol = null;
415 if (length > 0) {
416 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
417 }
418 if (DEBUG_BUFFER) {
419 System.out.print(")scanName: ");
420 XMLEntityManager.print(fCurrentEntity);
421 System.out.println(" -> "+String.valueOf(symbol));
422 }
423 return symbol;
424
425 } // scanName():String
426
427 /**
428 * Returns a string matching the NCName production appearing immediately
429 * on the input as a symbol, or null if no NCName string is present.
430 * <p>
431 * <strong>Note:</strong> The NCName characters are consumed.
432 * <p>
433 * <strong>Note:</strong> The string returned must be a symbol. The
434 * SymbolTable can be used for this purpose.
435 *
436 * @throws IOException Thrown if i/o error occurs.
437 * @throws EOFException Thrown on end of file.
438 *
439 * @see org.apache.xerces.util.SymbolTable
440 * @see org.apache.xerces.util.XMLChar#isNCName
441 * @see org.apache.xerces.util.XMLChar#isNCNameStart
442 */
443 public String scanNCName() throws IOException {
444 if (DEBUG_BUFFER) {
445 System.out.print("(scanNCName: ");
446 XMLEntityManager.print(fCurrentEntity);
447 System.out.println();
448 }
449
450 // load more characters, if needed
451 if (fCurrentEntity.position == fCurrentEntity.count) {
452 load(0, true);
453 }
454
455 // scan name
456 int offset = fCurrentEntity.position;
457 if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
458 if (++fCurrentEntity.position == fCurrentEntity.count) {
459 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
460 offset = 0;
461 if (load(1, false)) {
462 fCurrentEntity.columnNumber++;
463 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
464 if (DEBUG_BUFFER) {
465 System.out.print(")scanNCName: ");
466 XMLEntityManager.print(fCurrentEntity);
467 System.out.println(" -> "+String.valueOf(symbol));
468 }
469 return symbol;
470 }
471 }
472 while (XMLChar.isNCName(fCurrentEntity.ch[fCurrentEntity.position])) {
473 if (++fCurrentEntity.position == fCurrentEntity.count) {
474 int length = fCurrentEntity.position - offset;
475 if (length == fCurrentEntity.ch.length) {
476 // bad luck we have to resize our buffer
477 char[] tmp = new char[fCurrentEntity.ch.length << 1];
478 System.arraycopy(fCurrentEntity.ch, offset,
479 tmp, 0, length);
480 fCurrentEntity.ch = tmp;
481 }
482 else {
483 System.arraycopy(fCurrentEntity.ch, offset,
484 fCurrentEntity.ch, 0, length);
485 }
486 offset = 0;
487 if (load(length, false)) {
488 break;
489 }
490 }
491 }
492 }
493 int length = fCurrentEntity.position - offset;
494 fCurrentEntity.columnNumber += length;
495
496 // return name
497 String symbol = null;
498 if (length > 0) {
499 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
500 }
501 if (DEBUG_BUFFER) {
502 System.out.print(")scanNCName: ");
503 XMLEntityManager.print(fCurrentEntity);
504 System.out.println(" -> "+String.valueOf(symbol));
505 }
506 return symbol;
507
508 } // scanNCName():String
509
510 /**
511 * Scans a qualified name from the input, setting the fields of the
512 * QName structure appropriately.
513 * <p>
514 * <strong>Note:</strong> The qualified name characters are consumed.
515 * <p>
516 * <strong>Note:</strong> The strings used to set the values of the
517 * QName structure must be symbols. The SymbolTable can be used for
518 * this purpose.
519 *
520 * @param qname The qualified name structure to fill.
521 *
522 * @return Returns true if a qualified name appeared immediately on
523 * the input and was scanned, false otherwise.
524 *
525 * @throws IOException Thrown if i/o error occurs.
526 * @throws EOFException Thrown on end of file.
527 *
528 * @see org.apache.xerces.util.SymbolTable
529 * @see org.apache.xerces.util.XMLChar#isName
530 * @see org.apache.xerces.util.XMLChar#isNameStart
531 */
532 public boolean scanQName(QName qname) throws IOException {
533 if (DEBUG_BUFFER) {
534 System.out.print("(scanQName, "+qname+": ");
535 XMLEntityManager.print(fCurrentEntity);
536 System.out.println();
537 }
538
539 // load more characters, if needed
540 if (fCurrentEntity.position == fCurrentEntity.count) {
541 load(0, true);
542 }
543
544 // scan qualified name
545 int offset = fCurrentEntity.position;
546 if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
547 if (++fCurrentEntity.position == fCurrentEntity.count) {
548 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
549 offset = 0;
550 if (load(1, false)) {
551 fCurrentEntity.columnNumber++;
552 String name =
553 fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
554 qname.setValues(null, name, name, null);
555 if (DEBUG_BUFFER) {
556 System.out.print(")scanQName, "+qname+": ");
557 XMLEntityManager.print(fCurrentEntity);
558 System.out.println(" -> true");
559 }
560 return true;
561 }
562 }
563 int index = -1;
564 while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
565 char c = fCurrentEntity.ch[fCurrentEntity.position];
566
567 if (c == ':') {
568 if (index != -1) {
569 break;
570 }
571 index = fCurrentEntity.position;
572 }
573 if (++fCurrentEntity.position == fCurrentEntity.count) {
574 int length = fCurrentEntity.position - offset;
575 if (length == fCurrentEntity.ch.length) {
576 // bad luck we have to resize our buffer
577 char[] tmp = new char[fCurrentEntity.ch.length << 1];
578 System.arraycopy(fCurrentEntity.ch, offset,
579 tmp, 0, length);
580 fCurrentEntity.ch = tmp;
581 }
582 else {
583 System.arraycopy(fCurrentEntity.ch, offset,
584 fCurrentEntity.ch, 0, length);
585 }
586 if (index != -1) {
587 index = index - offset;
588 }
589 offset = 0;
590 if (load(length, false)) {
591 break;
592 }
593 }
594 }
595 int length = fCurrentEntity.position - offset;
596 fCurrentEntity.columnNumber += length;
597 if (length > 0) {
598 String prefix = null;
599 String localpart = null;
600 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
601 offset, length);
602 if (index != -1) {
603 int prefixLength = index - offset;
604 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
605 offset, prefixLength);
606 int len = length - prefixLength - 1;
607 int startLocal = index +1;
608 if (!XMLChar.isNCNameStart(fCurrentEntity.ch[startLocal])){
609 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
610 "IllegalQName",
611 null,
612 XMLErrorReporter.SEVERITY_FATAL_ERROR);
613 }
614 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
615 startLocal, len);
616
617 }
618 else {
619 localpart = rawname;
620 }
621 qname.setValues(prefix, localpart, rawname, null);
622 if (DEBUG_BUFFER) {
623 System.out.print(")scanQName, "+qname+": ");
624 XMLEntityManager.print(fCurrentEntity);
625 System.out.println(" -> true");
626 }
627 return true;
628 }
629 }
630
631 // no qualified name found
632 if (DEBUG_BUFFER) {
633 System.out.print(")scanQName, "+qname+": ");
634 XMLEntityManager.print(fCurrentEntity);
635 System.out.println(" -> false");
636 }
637 return false;
638
639 } // scanQName(QName):boolean
640
641 /**
642 * Scans a range of parsed character data, setting the fields of the
643 * XMLString structure, appropriately.
644 * <p>
645 * <strong>Note:</strong> The characters are consumed.
646 * <p>
647 * <strong>Note:</strong> This method does not guarantee to return
648 * the longest run of parsed character data. This method may return
649 * before markup due to reaching the end of the input buffer or any
650 * other reason.
651 * <p>
652 * <strong>Note:</strong> The fields contained in the XMLString
653 * structure are not guaranteed to remain valid upon subsequent calls
654 * to the entity scanner. Therefore, the caller is responsible for
655 * immediately using the returned character data or making a copy of
656 * the character data.
657 *
658 * @param content The content structure to fill.
659 *
660 * @return Returns the next character on the input, if known. This
661 * value may be -1 but this does <em>note</em> designate
662 * end of file.
663 *
664 * @throws IOException Thrown if i/o error occurs.
665 * @throws EOFException Thrown on end of file.
666 */
667 public int scanContent(XMLString content) throws IOException {
668 if (DEBUG_BUFFER) {
669 System.out.print("(scanContent: ");
670 XMLEntityManager.print(fCurrentEntity);
671 System.out.println();
672 }
673
674 // load more characters, if needed
675 if (fCurrentEntity.position == fCurrentEntity.count) {
676 load(0, true);
677 }
678 else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
679 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
680 load(1, false);
681 fCurrentEntity.position = 0;
682 fCurrentEntity.startPosition = 0;
683 }
684
685 // normalize newlines
686 int offset = fCurrentEntity.position;
687 int c = fCurrentEntity.ch[offset];
688 int newlines = 0;
689 boolean external = fCurrentEntity.isExternal();
690 if (c == '\n' || (c == '\r' && external)) {
691 if (DEBUG_BUFFER) {
692 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
693 XMLEntityManager.print(fCurrentEntity);
694 System.out.println();
695 }
696 do {
697 c = fCurrentEntity.ch[fCurrentEntity.position++];
698 if (c == '\r' && external) {
699 newlines++;
700 fCurrentEntity.lineNumber++;
701 fCurrentEntity.columnNumber = 1;
702 if (fCurrentEntity.position == fCurrentEntity.count) {
703 offset = 0;
704 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
705 fCurrentEntity.position = newlines;
706 fCurrentEntity.startPosition = newlines;
707 if (load(newlines, false)) {
708 break;
709 }
710 }
711 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
712 fCurrentEntity.position++;
713 offset++;
714 }
715 /*** NEWLINE NORMALIZATION ***/
716 else {
717 newlines++;
718 }
719 }
720 else if (c == '\n') {
721 newlines++;
722 fCurrentEntity.lineNumber++;
723 fCurrentEntity.columnNumber = 1;
724 if (fCurrentEntity.position == fCurrentEntity.count) {
725 offset = 0;
726 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
727 fCurrentEntity.position = newlines;
728 fCurrentEntity.startPosition = newlines;
729 if (load(newlines, false)) {
730 break;
731 }
732 }
733 }
734 else {
735 fCurrentEntity.position--;
736 break;
737 }
738 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
739 for (int i = offset; i < fCurrentEntity.position; i++) {
740 fCurrentEntity.ch[i] = '\n';
741 }
742 int length = fCurrentEntity.position - offset;
743 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
744 content.setValues(fCurrentEntity.ch, offset, length);
745 if (DEBUG_BUFFER) {
746 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
747 XMLEntityManager.print(fCurrentEntity);
748 System.out.println();
749 }
750 return -1;
751 }
752 if (DEBUG_BUFFER) {
753 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
754 XMLEntityManager.print(fCurrentEntity);
755 System.out.println();
756 }
757 }
758
759 // inner loop, scanning for content
760 while (fCurrentEntity.position < fCurrentEntity.count) {
761 c = fCurrentEntity.ch[fCurrentEntity.position++];
762 if (!XMLChar.isContent(c)) {
763 fCurrentEntity.position--;
764 break;
765 }
766 }
767 int length = fCurrentEntity.position - offset;
768 fCurrentEntity.columnNumber += length - newlines;
769 content.setValues(fCurrentEntity.ch, offset, length);
770
771 // return next character
772 if (fCurrentEntity.position != fCurrentEntity.count) {
773 c = fCurrentEntity.ch[fCurrentEntity.position];
774 // REVISIT: Does this need to be updated to fix the
775 // #x0D ^#x0A newline normalization problem? -Ac
776 if (c == '\r' && external) {
777 c = '\n';
778 }
779 }
780 else {
781 c = -1;
782 }
783 if (DEBUG_BUFFER) {
784 System.out.print(")scanContent: ");
785 XMLEntityManager.print(fCurrentEntity);
786 System.out.println(" -> '"+(char)c+"'");
787 }
788 return c;
789
790 } // scanContent(XMLString):int
791
792 /**
793 * Scans a range of attribute value data, setting the fields of the
794 * XMLString structure, appropriately.
795 * <p>
796 * <strong>Note:</strong> The characters are consumed.
797 * <p>
798 * <strong>Note:</strong> This method does not guarantee to return
799 * the longest run of attribute value data. This method may return
800 * before the quote character due to reaching the end of the input
801 * buffer or any other reason.
802 * <p>
803 * <strong>Note:</strong> The fields contained in the XMLString
804 * structure are not guaranteed to remain valid upon subsequent calls
805 * to the entity scanner. Therefore, the caller is responsible for
806 * immediately using the returned character data or making a copy of
807 * the character data.
808 *
809 * @param quote The quote character that signifies the end of the
810 * attribute value data.
811 * @param content The content structure to fill.
812 *
813 * @return Returns the next character on the input, if known. This
814 * value may be -1 but this does <em>note</em> designate
815 * end of file.
816 *
817 * @throws IOException Thrown if i/o error occurs.
818 * @throws EOFException Thrown on end of file.
819 */
820 public int scanLiteral(int quote, XMLString content)
821 throws IOException {
822 if (DEBUG_BUFFER) {
823 System.out.print("(scanLiteral, '"+(char)quote+"': ");
824 XMLEntityManager.print(fCurrentEntity);
825 System.out.println();
826 }
827
828 // load more characters, if needed
829 if (fCurrentEntity.position == fCurrentEntity.count) {
830 load(0, true);
831 }
832 else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
833 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
834 load(1, false);
835 fCurrentEntity.position = 0;
836 fCurrentEntity.startPosition = 0;
837 }
838
839 // normalize newlines
840 int offset = fCurrentEntity.position;
841 int c = fCurrentEntity.ch[offset];
842 int newlines = 0;
843 boolean external = fCurrentEntity.isExternal();
844 if (c == '\n' || (c == '\r' && external)) {
845 if (DEBUG_BUFFER) {
846 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
847 XMLEntityManager.print(fCurrentEntity);
848 System.out.println();
849 }
850 do {
851 c = fCurrentEntity.ch[fCurrentEntity.position++];
852 if (c == '\r' && external) {
853 newlines++;
854 fCurrentEntity.lineNumber++;
855 fCurrentEntity.columnNumber = 1;
856 if (fCurrentEntity.position == fCurrentEntity.count) {
857 offset = 0;
858 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
859 fCurrentEntity.position = newlines;
860 fCurrentEntity.startPosition = newlines;
861 if (load(newlines, false)) {
862 break;
863 }
864 }
865 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
866 fCurrentEntity.position++;
867 offset++;
868 }
869 /*** NEWLINE NORMALIZATION ***/
870 else {
871 newlines++;
872 }
873 /***/
874 }
875 else if (c == '\n') {
876 newlines++;
877 fCurrentEntity.lineNumber++;
878 fCurrentEntity.columnNumber = 1;
879 if (fCurrentEntity.position == fCurrentEntity.count) {
880 offset = 0;
881 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
882 fCurrentEntity.position = newlines;
883 fCurrentEntity.startPosition = newlines;
884 if (load(newlines, false)) {
885 break;
886 }
887 }
888 }
889 else {
890 fCurrentEntity.position--;
891 break;
892 }
893 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
894 for (int i = offset; i < fCurrentEntity.position; i++) {
895 fCurrentEntity.ch[i] = '\n';
896 }
897 int length = fCurrentEntity.position - offset;
898 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
899 content.setValues(fCurrentEntity.ch, offset, length);
900 if (DEBUG_BUFFER) {
901 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
902 XMLEntityManager.print(fCurrentEntity);
903 System.out.println();
904 }
905 return -1;
906 }
907 if (DEBUG_BUFFER) {
908 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
909 XMLEntityManager.print(fCurrentEntity);
910 System.out.println();
911 }
912 }
913
914 // scan literal value
915 while (fCurrentEntity.position < fCurrentEntity.count) {
916 c = fCurrentEntity.ch[fCurrentEntity.position++];
917 if ((c == quote &&
918 (!fCurrentEntity.literal || external))
919 || c == '%' || !XMLChar.isContent(c)) {
920 fCurrentEntity.position--;
921 break;
922 }
923 }
924 int length = fCurrentEntity.position - offset;
925 fCurrentEntity.columnNumber += length - newlines;
926 content.setValues(fCurrentEntity.ch, offset, length);
927
928 // return next character
929 if (fCurrentEntity.position != fCurrentEntity.count) {
930 c = fCurrentEntity.ch[fCurrentEntity.position];
931 // NOTE: We don't want to accidentally signal the
932 // end of the literal if we're expanding an
933 // entity appearing in the literal. -Ac
934 if (c == quote && fCurrentEntity.literal) {
935 c = -1;
936 }
937 }
938 else {
939 c = -1;
940 }
941 if (DEBUG_BUFFER) {
942 System.out.print(")scanLiteral, '"+(char)quote+"': ");
943 XMLEntityManager.print(fCurrentEntity);
944 System.out.println(" -> '"+(char)c+"'");
945 }
946 return c;
947
948 } // scanLiteral(int,XMLString):int
949
950 /**
951 * Scans a range of character data up to the specified delimiter,
952 * setting the fields of the XMLString structure, appropriately.
953 * <p>
954 * <strong>Note:</strong> The characters are consumed.
955 * <p>
956 * <strong>Note:</strong> This assumes that the internal buffer is
957 * at least the same size, or bigger, than the length of the delimiter
958 * and that the delimiter contains at least one character.
959 * <p>
960 * <strong>Note:</strong> This method does not guarantee to return
961 * the longest run of character data. This method may return before
962 * the delimiter due to reaching the end of the input buffer or any
963 * other reason.
964 * <p>
965 * <strong>Note:</strong> The fields contained in the XMLString
966 * structure are not guaranteed to remain valid upon subsequent calls
967 * to the entity scanner. Therefore, the caller is responsible for
968 * immediately using the returned character data or making a copy of
969 * the character data.
970 *
971 * @param delimiter The string that signifies the end of the character
972 * data to be scanned.
973 * @param buffer The XMLStringBuffer to fill.
974 *
975 * @return Returns true if there is more data to scan, false otherwise.
976 *
977 * @throws IOException Thrown if i/o error occurs.
978 * @throws EOFException Thrown on end of file.
979 */
980 public boolean scanData(String delimiter, XMLStringBuffer buffer)
981 throws IOException {
982
983 // REVISIT: This method does not need to use a string buffer.
984 // The change would avoid the array copies and increase
985 // performance. -Ac
986 //
987 // Currently, this method is called for scanning CDATA
988 // sections, comments, and processing instruction data.
989 // So if this code is updated to NOT buffer, the scanning
990 // code for comments and processing instructions will
991 // need to be updated to do its own buffering. The code
992 // for CDATA sections is safe as-is. -Ac
993
994 boolean found = false;
995 int delimLen = delimiter.length();
996 char charAt0 = delimiter.charAt(0);
997 boolean external = fCurrentEntity.isExternal();
998 if (DEBUG_BUFFER) {
999 System.out.print("(scanData: ");
1000 XMLEntityManager.print(fCurrentEntity);
1001 System.out.println();
1002 }
1003
1004 // load more characters, if needed
1005
1006 if (fCurrentEntity.position == fCurrentEntity.count) {
1007 load(0, true);
1008 }
1009
1010 boolean bNextEntity = false;
1011
1012 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1013 && (!bNextEntity))
1014 {
1015 System.arraycopy(fCurrentEntity.ch,
1016 fCurrentEntity.position,
1017 fCurrentEntity.ch,
1018 0,
1019 fCurrentEntity.count - fCurrentEntity.position);
1020
1021 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false);
1022 fCurrentEntity.position = 0;
1023 fCurrentEntity.startPosition = 0;
1024 }
1025
1026 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1027 // something must be wrong with the input: e.g., file ends in an unterminated comment
1028 int length = fCurrentEntity.count - fCurrentEntity.position;
1029 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1030 fCurrentEntity.columnNumber += fCurrentEntity.count;
1031 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1032 fCurrentEntity.position = fCurrentEntity.count;
1033 fCurrentEntity.startPosition = fCurrentEntity.count;
1034 load(0,true);
1035 return false;
1036 }
1037
1038 // normalize newlines
1039 int offset = fCurrentEntity.position;
1040 int c = fCurrentEntity.ch[offset];
1041 int newlines = 0;
1042 if (c == '\n' || (c == '\r' && external)) {
1043 if (DEBUG_BUFFER) {
1044 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1045 XMLEntityManager.print(fCurrentEntity);
1046 System.out.println();
1047 }
1048 do {
1049 c = fCurrentEntity.ch[fCurrentEntity.position++];
1050 if (c == '\r' && external) {
1051 newlines++;
1052 fCurrentEntity.lineNumber++;
1053 fCurrentEntity.columnNumber = 1;
1054 if (fCurrentEntity.position == fCurrentEntity.count) {
1055 offset = 0;
1056 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1057 fCurrentEntity.position = newlines;
1058 fCurrentEntity.startPosition = newlines;
1059 if (load(newlines, false)) {
1060 break;
1061 }
1062 }
1063 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1064 fCurrentEntity.position++;
1065 offset++;
1066 }
1067 /*** NEWLINE NORMALIZATION ***/
1068 else {
1069 newlines++;
1070 }
1071 }
1072 else if (c == '\n') {
1073 newlines++;
1074 fCurrentEntity.lineNumber++;
1075 fCurrentEntity.columnNumber = 1;
1076 if (fCurrentEntity.position == fCurrentEntity.count) {
1077 offset = 0;
1078 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1079 fCurrentEntity.position = newlines;
1080 fCurrentEntity.startPosition = newlines;
1081 fCurrentEntity.count = newlines;
1082 if (load(newlines, false)) {
1083 break;
1084 }
1085 }
1086 }
1087 else {
1088 fCurrentEntity.position--;
1089 break;
1090 }
1091 } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1092 for (int i = offset; i < fCurrentEntity.position; i++) {
1093 fCurrentEntity.ch[i] = '\n';
1094 }
1095 int length = fCurrentEntity.position - offset;
1096 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1097 buffer.append(fCurrentEntity.ch, offset, length);
1098 if (DEBUG_BUFFER) {
1099 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1100 XMLEntityManager.print(fCurrentEntity);
1101 System.out.println();
1102 }
1103 return true;
1104 }
1105 if (DEBUG_BUFFER) {
1106 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1107 XMLEntityManager.print(fCurrentEntity);
1108 System.out.println();
1109 }
1110 }
1111
1112 // iterate over buffer looking for delimiter
1113 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1114 c = fCurrentEntity.ch[fCurrentEntity.position++];
1115 if (c == charAt0) {
1116 // looks like we just hit the delimiter
1117 int delimOffset = fCurrentEntity.position - 1;
1118 for (int i = 1; i < delimLen; i++) {
1119 if (fCurrentEntity.position == fCurrentEntity.count) {
1120 fCurrentEntity.position -= i;
1121 break OUTER;
1122 }
1123 c = fCurrentEntity.ch[fCurrentEntity.position++];
1124 if (delimiter.charAt(i) != c) {
1125 fCurrentEntity.position--;
1126 break;
1127 }
1128 }
1129 if (fCurrentEntity.position == delimOffset + delimLen) {
1130 found = true;
1131 break;
1132 }
1133 }
1134 else if (c == '\n' || (external && c == '\r')) {
1135 fCurrentEntity.position--;
1136 break;
1137 }
1138 else if (XMLChar.isInvalid(c)) {
1139 fCurrentEntity.position--;
1140 int length = fCurrentEntity.position - offset;
1141 fCurrentEntity.columnNumber += length - newlines;
1142 buffer.append(fCurrentEntity.ch, offset, length);
1143 return true;
1144 }
1145 }
1146 int length = fCurrentEntity.position - offset;
1147 fCurrentEntity.columnNumber += length - newlines;
1148 if (found) {
1149 length -= delimLen;
1150 }
1151 buffer.append (fCurrentEntity.ch, offset, length);
1152
1153 // return true if string was skipped
1154 if (DEBUG_BUFFER) {
1155 System.out.print(")scanData: ");
1156 XMLEntityManager.print(fCurrentEntity);
1157 System.out.println(" -> " + !found);
1158 }
1159 return !found;
1160
1161 } // scanData(String,XMLString):boolean
1162
1163 /**
1164 * Skips a character appearing immediately on the input.
1165 * <p>
1166 * <strong>Note:</strong> The character is consumed only if it matches
1167 * the specified character.
1168 *
1169 * @param c The character to skip.
1170 *
1171 * @return Returns true if the character was skipped.
1172 *
1173 * @throws IOException Thrown if i/o error occurs.
1174 * @throws EOFException Thrown on end of file.
1175 */
1176 public boolean skipChar(int c) throws IOException {
1177 if (DEBUG_BUFFER) {
1178 System.out.print("(skipChar, '"+(char)c+"': ");
1179 XMLEntityManager.print(fCurrentEntity);
1180 System.out.println();
1181 }
1182
1183 // load more characters, if needed
1184 if (fCurrentEntity.position == fCurrentEntity.count) {
1185 load(0, true);
1186 }
1187
1188 // skip character
1189 int cc = fCurrentEntity.ch[fCurrentEntity.position];
1190 if (cc == c) {
1191 fCurrentEntity.position++;
1192 if (c == '\n') {
1193 fCurrentEntity.lineNumber++;
1194 fCurrentEntity.columnNumber = 1;
1195 }
1196 else {
1197 fCurrentEntity.columnNumber++;
1198 }
1199 if (DEBUG_BUFFER) {
1200 System.out.print(")skipChar, '"+(char)c+"': ");
1201 XMLEntityManager.print(fCurrentEntity);
1202 System.out.println(" -> true");
1203 }
1204 return true;
1205 }
1206 else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
1207 // handle newlines
1208 if (fCurrentEntity.position == fCurrentEntity.count) {
1209 fCurrentEntity.ch[0] = (char)cc;
1210 load(1, false);
1211 }
1212 fCurrentEntity.position++;
1213 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1214 fCurrentEntity.position++;
1215 }
1216 fCurrentEntity.lineNumber++;
1217 fCurrentEntity.columnNumber = 1;
1218 if (DEBUG_BUFFER) {
1219 System.out.print(")skipChar, '"+(char)c+"': ");
1220 XMLEntityManager.print(fCurrentEntity);
1221 System.out.println(" -> true");
1222 }
1223 return true;
1224 }
1225
1226 // character was not skipped
1227 if (DEBUG_BUFFER) {
1228 System.out.print(")skipChar, '"+(char)c+"': ");
1229 XMLEntityManager.print(fCurrentEntity);
1230 System.out.println(" -> false");
1231 }
1232 return false;
1233
1234 } // skipChar(int):boolean
1235
1236 /**
1237 * Skips space characters appearing immediately on the input.
1238 * <p>
1239 * <strong>Note:</strong> The characters are consumed only if they are
1240 * space characters.
1241 *
1242 * @return Returns true if at least one space character was skipped.
1243 *
1244 * @throws IOException Thrown if i/o error occurs.
1245 * @throws EOFException Thrown on end of file.
1246 *
1247 * @see org.apache.xerces.util.XMLChar#isSpace
1248 */
1249 public boolean skipSpaces() throws IOException {
1250 if (DEBUG_BUFFER) {
1251 System.out.print("(skipSpaces: ");
1252 XMLEntityManager.print(fCurrentEntity);
1253 System.out.println();
1254 }
1255
1256 // load more characters, if needed
1257 if (fCurrentEntity.position == fCurrentEntity.count) {
1258 load(0, true);
1259 }
1260
1261 // skip spaces
1262 int c = fCurrentEntity.ch[fCurrentEntity.position];
1263 if (XMLChar.isSpace(c)) {
1264 boolean external = fCurrentEntity.isExternal();
1265 do {
1266 boolean entityChanged = false;
1267 // handle newlines
1268 if (c == '\n' || (external && c == '\r')) {
1269 fCurrentEntity.lineNumber++;
1270 fCurrentEntity.columnNumber = 1;
1271 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1272 fCurrentEntity.ch[0] = (char)c;
1273 entityChanged = load(1, true);
1274 if (!entityChanged) {
1275 // the load change the position to be 1,
1276 // need to restore it when entity not changed
1277 fCurrentEntity.position = 0;
1278 fCurrentEntity.startPosition = 0;
1279 }
1280 }
1281 if (c == '\r' && external) {
1282 // REVISIT: Does this need to be updated to fix the
1283 // #x0D ^#x0A newline normalization problem? -Ac
1284 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1285 fCurrentEntity.position--;
1286 }
1287 }
1288 /*** NEWLINE NORMALIZATION ***
1289 else {
1290 if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
1291 && external) {
1292 fCurrentEntity.position++;
1293 }
1294 }
1295 /***/
1296 }
1297 else {
1298 fCurrentEntity.columnNumber++;
1299 }
1300 // load more characters, if needed
1301 if (!entityChanged)
1302 fCurrentEntity.position++;
1303 if (fCurrentEntity.position == fCurrentEntity.count) {
1304 load(0, true);
1305 }
1306 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1307 if (DEBUG_BUFFER) {
1308 System.out.print(")skipSpaces: ");
1309 XMLEntityManager.print(fCurrentEntity);
1310 System.out.println(" -> true");
1311 }
1312 return true;
1313 }
1314
1315 // no spaces were found
1316 if (DEBUG_BUFFER) {
1317 System.out.print(")skipSpaces: ");
1318 XMLEntityManager.print(fCurrentEntity);
1319 System.out.println(" -> false");
1320 }
1321 return false;
1322
1323 } // skipSpaces():boolean
1324
1325 /**
1326 * Skips space characters appearing immediately on the input that would
1327 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
1328 * normalization is performed. This is useful when scanning structures
1329 * such as the XMLDecl and TextDecl that can only contain US-ASCII
1330 * characters.
1331 * <p>
1332 * <strong>Note:</strong> The characters are consumed only if they would
1333 * match non-terminal S before end of line normalization is performed.
1334 *
1335 * @return Returns true if at least one space character was skipped.
1336 *
1337 * @throws IOException Thrown if i/o error occurs.
1338 * @throws EOFException Thrown on end of file.
1339 *
1340 * @see org.apache.xerces.util.XMLChar#isSpace
1341 */
1342 public final boolean skipDeclSpaces() throws IOException {
1343 if (DEBUG_BUFFER) {
1344 System.out.print("(skipDeclSpaces: ");
1345 XMLEntityManager.print(fCurrentEntity);
1346 System.out.println();
1347 }
1348
1349 // load more characters, if needed
1350 if (fCurrentEntity.position == fCurrentEntity.count) {
1351 load(0, true);
1352 }
1353
1354 // skip spaces
1355 int c = fCurrentEntity.ch[fCurrentEntity.position];
1356 if (XMLChar.isSpace(c)) {
1357 boolean external = fCurrentEntity.isExternal();
1358 do {
1359 boolean entityChanged = false;
1360 // handle newlines
1361 if (c == '\n' || (external && c == '\r')) {
1362 fCurrentEntity.lineNumber++;
1363 fCurrentEntity.columnNumber = 1;
1364 if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1365 fCurrentEntity.ch[0] = (char)c;
1366 entityChanged = load(1, true);
1367 if (!entityChanged) {
1368 // the load change the position to be 1,
1369 // need to restore it when entity not changed
1370 fCurrentEntity.position = 0;
1371 fCurrentEntity.startPosition = 0;
1372 }
1373 }
1374 if (c == '\r' && external) {
1375 // REVISIT: Does this need to be updated to fix the
1376 // #x0D ^#x0A newline normalization problem? -Ac
1377 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1378 fCurrentEntity.position--;
1379 }
1380 }
1381 /*** NEWLINE NORMALIZATION ***
1382 else {
1383 if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
1384 && external) {
1385 fCurrentEntity.position++;
1386 }
1387 }
1388 /***/
1389 }
1390 else {
1391 fCurrentEntity.columnNumber++;
1392 }
1393 // load more characters, if needed
1394 if (!entityChanged)
1395 fCurrentEntity.position++;
1396 if (fCurrentEntity.position == fCurrentEntity.count) {
1397 load(0, true);
1398 }
1399 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1400 if (DEBUG_BUFFER) {
1401 System.out.print(")skipDeclSpaces: ");
1402 XMLEntityManager.print(fCurrentEntity);
1403 System.out.println(" -> true");
1404 }
1405 return true;
1406 }
1407
1408 // no spaces were found
1409 if (DEBUG_BUFFER) {
1410 System.out.print(")skipDeclSpaces: ");
1411 XMLEntityManager.print(fCurrentEntity);
1412 System.out.println(" -> false");
1413 }
1414 return false;
1415
1416 } // skipDeclSpaces():boolean
1417
1418 /**
1419 * Skips the specified string appearing immediately on the input.
1420 * <p>
1421 * <strong>Note:</strong> The characters are consumed only if they are
1422 * space characters.
1423 *
1424 * @param s The string to skip.
1425 *
1426 * @return Returns true if the string was skipped.
1427 *
1428 * @throws IOException Thrown if i/o error occurs.
1429 * @throws EOFException Thrown on end of file.
1430 */
1431 public boolean skipString(String s) throws IOException {
1432 if (DEBUG_BUFFER) {
1433 System.out.print("(skipString, \""+s+"\": ");
1434 XMLEntityManager.print(fCurrentEntity);
1435 System.out.println();
1436 }
1437
1438 // load more characters, if needed
1439 if (fCurrentEntity.position == fCurrentEntity.count) {
1440 load(0, true);
1441 }
1442
1443 // skip string
1444 final int length = s.length();
1445 for (int i = 0; i < length; i++) {
1446 char c = fCurrentEntity.ch[fCurrentEntity.position++];
1447 if (c != s.charAt(i)) {
1448 fCurrentEntity.position -= i + 1;
1449 if (DEBUG_BUFFER) {
1450 System.out.print(")skipString, \""+s+"\": ");
1451 XMLEntityManager.print(fCurrentEntity);
1452 System.out.println(" -> false");
1453 }
1454 return false;
1455 }
1456 if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
1457 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
1458 // REVISIT: Can a string to be skipped cross an
1459 // entity boundary? -Ac
1460 if (load(i + 1, false)) {
1461 fCurrentEntity.startPosition -= i + 1;
1462 fCurrentEntity.position -= i + 1;
1463 if (DEBUG_BUFFER) {
1464 System.out.print(")skipString, \""+s+"\": ");
1465 XMLEntityManager.print(fCurrentEntity);
1466 System.out.println(" -> false");
1467 }
1468 return false;
1469 }
1470 }
1471 }
1472 if (DEBUG_BUFFER) {
1473 System.out.print(")skipString, \""+s+"\": ");
1474 XMLEntityManager.print(fCurrentEntity);
1475 System.out.println(" -> true");
1476 }
1477 fCurrentEntity.columnNumber += length;
1478 return true;
1479
1480 } // skipString(String):boolean
1481
1482 //
1483 // Locator methods
1484 //
1485
1486 /**
1487 * Return the public identifier for the current document event.
1488 * <p>
1489 * The return value is the public identifier of the document
1490 * entity or of the external parsed entity in which the markup
1491 * triggering the event appears.
1492 *
1493 * @return A string containing the public identifier, or
1494 * null if none is available.
1495 */
1496 public final String getPublicId() {
1497 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
1498 } // getPublicId():String
1499
1500 /**
1501 * Return the expanded system identifier for the current document event.
1502 * <p>
1503 * The return value is the expanded system identifier of the document
1504 * entity or of the external parsed entity in which the markup
1505 * triggering the event appears.
1506 * <p>
1507 * If the system identifier is a URL, the parser must resolve it
1508 * fully before passing it to the application.
1509 *
1510 * @return A string containing the expanded system identifier, or null
1511 * if none is available.
1512 */
1513 public final String getExpandedSystemId() {
1514 if (fCurrentEntity != null) {
1515 if (fCurrentEntity.entityLocation != null &&
1516 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) {
1517 return fCurrentEntity.entityLocation.getExpandedSystemId();
1518 }
1519 else {
1520 // get the current entity to return something appropriate:
1521 return fCurrentEntity.getExpandedSystemId();
1522 }
1523 }
1524 return null;
1525 } // getExpandedSystemId():String
1526
1527 /**
1528 * Return the literal system identifier for the current document event.
1529 * <p>
1530 * The return value is the literal system identifier of the document
1531 * entity or of the external parsed entity in which the markup
1532 * triggering the event appears.
1533 * <p>
1534 * @return A string containing the literal system identifier, or null
1535 * if none is available.
1536 */
1537 public final String getLiteralSystemId() {
1538 if (fCurrentEntity != null) {
1539 if (fCurrentEntity.entityLocation != null &&
1540 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) {
1541 return fCurrentEntity.entityLocation.getLiteralSystemId();
1542 }
1543 else {
1544 // get the current entity to do it:
1545 return fCurrentEntity.getLiteralSystemId();
1546 }
1547 }
1548 return null;
1549 } // getLiteralSystemId():String
1550
1551 /**
1552 * Returns the line number where the current document event ends.
1553 * <p>
1554 * <strong>Warning:</strong> The return value from the method
1555 * is intended only as an approximation for the sake of error
1556 * reporting; it is not intended to provide sufficient information
1557 * to edit the character content of the original XML document.
1558 * <p>
1559 * The return value is an approximation of the line number
1560 * in the document entity or external parsed entity where the
1561 * markup triggering the event appears.
1562 * <p>
1563 * If possible, the line position of the first character after the
1564 * text associated with the document event should be provided.
1565 * The first line in the document is line 1.
1566 *
1567 * @return The line number, or -1 if none is available.
1568 */
1569 public final int getLineNumber() {
1570 if (fCurrentEntity != null) {
1571 if (fCurrentEntity.isExternal()) {
1572 return fCurrentEntity.lineNumber;
1573 }
1574 else {
1575 // ask the current entity to return something appropriate:
1576 return fCurrentEntity.getLineNumber();
1577 }
1578 }
1579
1580 return -1;
1581
1582 } // getLineNumber():int
1583
1584 /**
1585 * Returns the column number where the current document event ends.
1586 * <p>
1587 * <strong>Warning:</strong> The return value from the method
1588 * is intended only as an approximation for the sake of error
1589 * reporting; it is not intended to provide sufficient information
1590 * to edit the character content of the original XML document.
1591 * <p>
1592 * The return value is an approximation of the column number
1593 * in the document entity or external parsed entity where the
1594 * markup triggering the event appears.
1595 * <p>
1596 * If possible, the line position of the first character after the
1597 * text associated with the document event should be provided.
1598 * The first column in each line is column 1.
1599 *
1600 * @return The column number, or -1 if none is available.
1601 */
1602 public final int getColumnNumber() {
1603 if (fCurrentEntity != null) {
1604 if (fCurrentEntity.isExternal()) {
1605 return fCurrentEntity.columnNumber;
1606 }
1607 else {
1608 // ask current entity to find appropriate column number
1609 return fCurrentEntity.getColumnNumber();
1610 }
1611 }
1612
1613 return -1;
1614 } // getColumnNumber():int
1615
1616 /**
1617 * Returns the character offset where the current document event ends.
1618 * <p>
1619 * <strong>Warning:</strong> The return value from the method
1620 * is intended only as an approximation for the sake of error
1621 * reporting; it is not intended to provide sufficient information
1622 * to edit the character content of the original XML document.
1623 * <p>
1624 * The return value is an approximation of the character offset
1625 * in the document entity or external parsed entity where the
1626 * markup triggering the event appears.
1627 * <p>
1628 * If possible, the character offset of the first character after the
1629 * text associated with the document event should be provided.
1630 *
1631 * @return The character offset, or -1 if none is available.
1632 */
1633 public final int getCharacterOffset() {
1634 if (fCurrentEntity != null) {
1635 if (fCurrentEntity.isExternal()) {
1636 return fCurrentEntity.baseCharOffset + (fCurrentEntity.position - fCurrentEntity.startPosition);
1637 }
1638 else {
1639 // ask current entity to find appropriate character offset
1640 return fCurrentEntity.getCharacterOffset();
1641 }
1642 }
1643
1644 return -1;
1645 } // getCharacterOffset():int
1646
1647 /**
1648 * Returns the encoding of the current entity.
1649 * Note that, for a given entity, this value can only be
1650 * considered final once the encoding declaration has been read (or once it
1651 * has been determined that there is no such declaration) since, no encoding
1652 * having been specified on the XMLInputSource, the parser
1653 * will make an initial "guess" which could be in error.
1654 */
1655 public final String getEncoding() {
1656 if (fCurrentEntity != null) {
1657 if (fCurrentEntity.isExternal()) {
1658 return fCurrentEntity.encoding;
1659 }
1660 else {
1661 // ask current entity to find appropriate encoding
1662 return fCurrentEntity.getEncoding();
1663 }
1664 }
1665 return null;
1666 } // getEncoding():String
1667
1668 /**
1669 * Returns the XML version of the current entity. This will normally be the
1670 * value from the XML or text declaration or defaulted by the parser. Note that
1671 * that this value may be different than the version of the processing rules
1672 * applied to the current entity. For instance, an XML 1.1 document may refer to
1673 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
1674 * document. Also note that, for a given entity, this value can only be considered
1675 * final once the XML or text declaration has been read or once it has been
1676 * determined that there is no such declaration.
1677 */
1678 public final String getXMLVersion() {
1679 if (fCurrentEntity != null) {
1680 if (fCurrentEntity.isExternal()) {
1681 return fCurrentEntity.xmlVersion;
1682 }
1683 else {
1684 // ask current entity to find the appropriate XML version
1685 return fCurrentEntity.getXMLVersion();
1686 }
1687 }
1688 return null;
1689 } // getXMLVersion():String
1690
1691 // allow entity manager to tell us what the current entityis:
1692 public final void setCurrentEntity(XMLEntityManager.ScannedEntity ent) {
1693 fCurrentEntity = ent;
1694 }
1695
1696 // set buffer size:
1697 public final void setBufferSize(int size) {
1698 // REVISIT: Buffer size passed to entity scanner
1699 // was not being kept in synch with the actual size
1700 // of the buffers in each scanned entity. If any
1701 // of the buffers were actually resized, it was possible
1702 // that the parser would throw an ArrayIndexOutOfBoundsException
1703 // for documents which contained names which are longer than
1704 // the current buffer size. Conceivably the buffer size passed
1705 // to entity scanner could be used to determine a minimum size
1706 // for resizing, if doubling its size is smaller than this
1707 // minimum. -- mrglavas
1708 fBufferSize = size;
1709 }
1710
1711 // reset what little state we have...
1712 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
1713 XMLErrorReporter reporter) {
1714 fCurrentEntity = null;
1715 fSymbolTable = symbolTable;
1716 fEntityManager = entityManager;
1717 fErrorReporter = reporter;
1718 }
1719
1720 //
1721 // Private methods
1722 //
1723
1724 /**
1725 * Loads a chunk of text.
1726 *
1727 * @param offset The offset into the character buffer to
1728 * read the next batch of characters.
1729 * @param changeEntity True if the load should change entities
1730 * at the end of the entity, otherwise leave
1731 * the current entity in place and the entity
1732 * boundary will be signaled by the return
1733 * value.
1734 *
1735 * @returns Returns true if the entity changed as a result of this
1736 * load operation.
1737 */
1738 final boolean load(int offset, boolean changeEntity)
1739 throws IOException {
1740 if (DEBUG_BUFFER) {
1741 System.out.print("(load, "+offset+": ");
1742 XMLEntityManager.print(fCurrentEntity);
1743 System.out.println();
1744 }
1745
1746 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1747 // read characters
1748 int length = fCurrentEntity.mayReadChunks?
1749 (fCurrentEntity.ch.length - offset):
1750 (XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE);
1751 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length);
1752 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1753 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count);
1754
1755 // reset count and position
1756 boolean entityChanged = false;
1757 if (count != -1) {
1758 if (count != 0) {
1759 fCurrentEntity.count = count + offset;
1760 fCurrentEntity.position = offset;
1761 fCurrentEntity.startPosition = offset;
1762 }
1763 }
1764
1765 // end of this entity
1766 else {
1767 fCurrentEntity.count = offset;
1768 fCurrentEntity.position = offset;
1769 fCurrentEntity.startPosition = offset;
1770 entityChanged = true;
1771 if (changeEntity) {
1772 fEntityManager.endEntity();
1773 if (fCurrentEntity == null) {
1774 throw END_OF_DOCUMENT_ENTITY;
1775 }
1776 // handle the trailing edges
1777 if (fCurrentEntity.position == fCurrentEntity.count) {
1778 load(0, true);
1779 }
1780 }
1781 }
1782 if (DEBUG_BUFFER) {
1783 System.out.print(")load, "+offset+": ");
1784 XMLEntityManager.print(fCurrentEntity);
1785 System.out.println();
1786 }
1787
1788 return entityChanged;
1789
1790 } // load(int, boolean):boolean
1791
1792 } // class XMLEntityScanner
1793