1 /*
2 * reserved comment block
3 * DO NOT REMOVE OR ALTER!
4 */
5 /*
6 * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21
22 // Sep 14, 2000:
23 // Fixed comments to preserve whitespaces and add a line break
24 // when indenting. Reported by Gervase Markham <gerv@gerv.net>
25 // Sep 14, 2000:
26 // Fixed serializer to report IO exception directly, instead at
27 // the end of document processing.
28 // Reported by Patrick Higgins <phiggins@transzap.com>
29 // Sep 13, 2000:
30 // CR in character data will print as �D;
31 // Aug 25, 2000:
32 // Fixed processing instruction printing inside element content
33 // to not escape content. Reported by Mikael Staldal
34 // <d96-mst@d.kth.se>
35 // Aug 25, 2000:
36 // Added ability to omit comments.
37 // Contributed by Anupam Bagchi <abagchi@jtcsv.com>
38 // Aug 26, 2000:
39 // Fixed bug in newline handling when preserving spaces.
40 // Contributed by Mike Dusseault <mdusseault@home.com>
41 // Aug 29, 2000:
42 // Fixed state.unescaped not being set to false when
43 // entering element state.
44 // Reported by Lowell Vaughn <lvaughn@agillion.com>
45
46
47 package com.sun.org.apache.xml.internal.serialize;
48
49
50 import java.io.IOException;
51 import java.io.OutputStream;
52 import java.io.Writer;
53 import java.util.Hashtable;
54 import java.util.Vector;
55
56 import com.sun.org.apache.xerces.internal.dom.DOMErrorImpl;
57 import com.sun.org.apache.xerces.internal.dom.DOMLocatorImpl;
58 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
59 import com.sun.org.apache.xerces.internal.util.XMLChar;
60 import org.w3c.dom.DOMImplementation;
61 import org.w3c.dom.Document;
62 import org.w3c.dom.DocumentFragment;
63 import org.w3c.dom.DocumentType;
64 import org.w3c.dom.DOMError;
65 import org.w3c.dom.DOMErrorHandler;
66 import org.w3c.dom.Element;
67 import org.w3c.dom.Entity;
68 import org.w3c.dom.NamedNodeMap;
69 import org.w3c.dom.Node;
70 import org.w3c.dom.Notation;
71 import org.w3c.dom.ls.LSException;
72 import org.w3c.dom.ls.LSSerializerFilter;
73 import org.w3c.dom.traversal.NodeFilter;
74 import org.xml.sax.ContentHandler;
75 import org.xml.sax.DTDHandler;
76 import org.xml.sax.DocumentHandler;
77 import org.xml.sax.Locator;
78 import org.xml.sax.SAXException;
79 import org.xml.sax.ext.DeclHandler;
80 import org.xml.sax.ext.LexicalHandler;
81
82 /**
83 * Base class for a serializer supporting both DOM and SAX pretty
84 * serializing of XML/HTML/XHTML documents. Derives classes perform
85 * the method-specific serializing, this class provides the common
86 * serializing mechanisms.
87 * <p>
88 * The serializer must be initialized with the proper writer and
89 * output format before it can be used by calling {@link #setOutputCharStream}
90 * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat}
91 * for the output format.
92 * <p>
93 * The serializer can be reused any number of times, but cannot
94 * be used concurrently by two threads.
95 * <p>
96 * If an output stream is used, the encoding is taken from the
97 * output format (defaults to <tt>UTF-8</tt>). If a writer is
98 * used, make sure the writer uses the same encoding (if applies)
99 * as specified in the output format.
100 * <p>
101 * The serializer supports both DOM and SAX. DOM serializing is done
102 * by calling {@link #serialize(Document)} and SAX serializing is done by firing
103 * SAX events and using the serializer as a document handler.
104 * This also applies to derived class.
105 * <p>
106 * If an I/O exception occurs while serializing, the serializer
107 * will not throw an exception directly, but only throw it
108 * at the end of serializing (either DOM or SAX's {@link
109 * org.xml.sax.DocumentHandler#endDocument}.
110 * <p>
111 * For elements that are not specified as whitespace preserving,
112 * the serializer will potentially break long text lines at space
113 * boundaries, indent lines, and serialize elements on separate
114 * lines. Line terminators will be regarded as spaces, and
115 * spaces at beginning of line will be stripped.
116 * <p>
117 * When indenting, the serializer is capable of detecting seemingly
118 * element content, and serializing these elements indented on separate
119 * lines. An element is serialized indented when it is the first or
120 * last child of an element, or immediate following or preceding
121 * another element.
122 *
123 *
124 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
125 * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
126 * @author Elena Litani, IBM
127 * @author Sunitha Reddy, Sun Microsystems
128 * @see Serializer
129 * @see LSSerializer
130 */
131 public abstract class BaseMarkupSerializer
132 implements ContentHandler, DocumentHandler, LexicalHandler,
133 DTDHandler, DeclHandler, DOMSerializer, Serializer
134 {
135
136 // DOM L3 implementation
137 protected short features = 0xFFFFFFFF;
138 protected DOMErrorHandler fDOMErrorHandler;
139 protected final DOMErrorImpl fDOMError = new DOMErrorImpl();
140 protected LSSerializerFilter fDOMFilter;
141
142 protected EncodingInfo _encodingInfo;
143
144
145 /**
146 * Holds array of all element states that have been entered.
147 * The array is automatically resized. When leaving an element,
148 * it's state is not removed but reused when later returning
149 * to the same nesting level.
150 */
151 private ElementState[] _elementStates;
152
153
154 /**
155 * The index of the next state to place in the array,
156 * or one plus the index of the current state. When zero,
157 * we are in no state.
158 */
159 private int _elementStateCount;
160
161
162 /**
163 * Vector holding comments and PIs that come before the root
164 * element (even after it), see {@link #serializePreRoot}.
165 */
166 private Vector _preRoot;
167
168
169 /**
170 * If the document has been started (header serialized), this
171 * flag is set to true so it's not started twice.
172 */
173 protected boolean _started;
174
175
176 /**
177 * True if the serializer has been prepared. This flag is set
178 * to false when the serializer is reset prior to using it,
179 * and to true after it has been prepared for usage.
180 */
181 private boolean _prepared;
182
183
184 /**
185 * Association between namespace URIs (keys) and prefixes (values).
186 * Accumulated here prior to starting an element and placing this
187 * list in the element state.
188 */
189 protected Hashtable _prefixes;
190
191
192 /**
193 * The system identifier of the document type, if known.
194 */
195 protected String _docTypePublicId;
196
197
198 /**
199 * The system identifier of the document type, if known.
200 */
201 protected String _docTypeSystemId;
202
203
204 /**
205 * The output format associated with this serializer. This will never
206 * be a null reference. If no format was passed to the constructor,
207 * the default one for this document type will be used. The format
208 * object is never changed by the serializer.
209 */
210 protected OutputFormat _format;
211
212
213 /**
214 * The printer used for printing text parts.
215 */
216 protected Printer _printer;
217
218
219 /**
220 * True if indenting printer.
221 */
222 protected boolean _indenting;
223
224 /** Temporary buffer to store character data */
225 protected final StringBuffer fStrBuffer = new StringBuffer(40);
226
227 /**
228 * The underlying writer.
229 */
230 private Writer _writer;
231
232
233 /**
234 * The output stream.
235 */
236 private OutputStream _output;
237
238 /** Current node that is being processed */
239 protected Node fCurrentNode = null;
240
241
242
243 //--------------------------------//
244 // Constructor and initialization //
245 //--------------------------------//
246
247
248 /**
249 * Protected constructor can only be used by derived class.
250 * Must initialize the serializer before serializing any document,
251 * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
252 * first
253 */
254 protected BaseMarkupSerializer( OutputFormat format )
255 {
256 int i;
257
258 _elementStates = new ElementState[ 10 ];
259 for ( i = 0 ; i < _elementStates.length ; ++i )
260 _elementStates[ i ] = new ElementState();
261 _format = format;
262 }
263
264
265 public DocumentHandler asDocumentHandler()
266 throws IOException
267 {
268 prepare();
269 return this;
270 }
271
272
273 public ContentHandler asContentHandler()
274 throws IOException
275 {
276 prepare();
277 return this;
278 }
279
280
281 public DOMSerializer asDOMSerializer()
282 throws IOException
283 {
284 prepare();
285 return this;
286 }
287
288
289 public void setOutputByteStream( OutputStream output )
290 {
291 if ( output == null ) {
292 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
293 "ArgumentIsNull", new Object[]{"output"});
294 throw new NullPointerException(msg);
295 }
296 _output = output;
297 _writer = null;
298 reset();
299 }
300
301
302 public void setOutputCharStream( Writer writer )
303 {
304 if ( writer == null ) {
305 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
306 "ArgumentIsNull", new Object[]{"writer"});
307 throw new NullPointerException(msg);
308 }
309 _writer = writer;
310 _output = null;
311 reset();
312 }
313
314
315 public void setOutputFormat( OutputFormat format )
316 {
317 if ( format == null ) {
318 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
319 "ArgumentIsNull", new Object[]{"format"});
320 throw new NullPointerException(msg);
321 }
322 _format = format;
323 reset();
324 }
325
326
327 public boolean reset()
328 {
329 if ( _elementStateCount > 1 ) {
330 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
331 "ResetInMiddle", null);
332 throw new IllegalStateException(msg);
333 }
334 _prepared = false;
335 fCurrentNode = null;
336 fStrBuffer.setLength(0);
337 return true;
338 }
339
340
341 protected void prepare()
342 throws IOException
343 {
344 if ( _prepared )
345 return;
346
347 if ( _writer == null && _output == null ) {
348 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
349 "NoWriterSupplied", null);
350 throw new IOException(msg);
351 }
352 // If the output stream has been set, use it to construct
353 // the writer. It is possible that the serializer has been
354 // reused with the same output stream and different encoding.
355
356 _encodingInfo = _format.getEncodingInfo();
357
358 if ( _output != null ) {
359 _writer = _encodingInfo.getWriter(_output);
360 }
361
362 if ( _format.getIndenting() ) {
363 _indenting = true;
364 _printer = new IndentPrinter( _writer, _format );
365 } else {
366 _indenting = false;
367 _printer = new Printer( _writer, _format );
368 }
369
370 ElementState state;
371
372 _elementStateCount = 0;
373 state = _elementStates[ 0 ];
374 state.namespaceURI = null;
375 state.localName = null;
376 state.rawName = null;
377 state.preserveSpace = _format.getPreserveSpace();
378 state.empty = true;
379 state.afterElement = false;
380 state.afterComment = false;
381 state.doCData = state.inCData = false;
382 state.prefixes = null;
383
384 _docTypePublicId = _format.getDoctypePublic();
385 _docTypeSystemId = _format.getDoctypeSystem();
386 _started = false;
387 _prepared = true;
388 }
389
390
391
392 //----------------------------------//
393 // DOM document serializing methods //
394 //----------------------------------//
395
396
397 /**
398 * Serializes the DOM element using the previously specified
399 * writer and output format. Throws an exception only if
400 * an I/O exception occured while serializing.
401 *
402 * @param elem The element to serialize
403 * @throws IOException An I/O exception occured while
404 * serializing
405 */
406 public void serialize( Element elem )
407 throws IOException
408 {
409 reset();
410 prepare();
411 serializeNode( elem );
412 _printer.flush();
413 if ( _printer.getException() != null )
414 throw _printer.getException();
415 }
416
417 /**
418 * Serializes a node using the previously specified
419 * writer and output format. Throws an exception only if
420 * an I/O exception occured while serializing.
421 *
422 * @param node Node to serialize
423 * @throws IOException An I/O exception occured while serializing
424 */
425 public void serialize( Node node ) throws IOException {
426 reset();
427 prepare();
428 serializeNode( node );
429 //Print any PIs and Comments which appeared in 'node'
430 serializePreRoot();
431 _printer.flush();
432 if ( _printer.getException() != null )
433 throw _printer.getException();
434 }
435
436 /**
437 * Serializes the DOM document fragmnt using the previously specified
438 * writer and output format. Throws an exception only if
439 * an I/O exception occured while serializing.
440 *
441 * @param elem The element to serialize
442 * @throws IOException An I/O exception occured while
443 * serializing
444 */
445 public void serialize( DocumentFragment frag )
446 throws IOException
447 {
448 reset();
449 prepare();
450 serializeNode( frag );
451 _printer.flush();
452 if ( _printer.getException() != null )
453 throw _printer.getException();
454 }
455
456
457 /**
458 * Serializes the DOM document using the previously specified
459 * writer and output format. Throws an exception only if
460 * an I/O exception occured while serializing.
461 *
462 * @param doc The document to serialize
463 * @throws IOException An I/O exception occured while
464 * serializing
465 */
466 public void serialize( Document doc )
467 throws IOException
468 {
469 reset();
470 prepare();
471 serializeNode( doc );
472 serializePreRoot();
473 _printer.flush();
474 if ( _printer.getException() != null )
475 throw _printer.getException();
476 }
477
478
479 //------------------------------------------//
480 // SAX document handler serializing methods //
481 //------------------------------------------//
482
483
484 public void startDocument()
485 throws SAXException
486 {
487 try {
488 prepare();
489 } catch ( IOException except ) {
490 throw new SAXException( except.toString() );
491 }
492 // Nothing to do here. All the magic happens in startDocument(String)
493 }
494
495
496 public void characters( char[] chars, int start, int length )
497 throws SAXException
498 {
499 ElementState state;
500
501 try {
502 state = content();
503
504 // Check if text should be print as CDATA section or unescaped
505 // based on elements listed in the output format (the element
506 // state) or whether we are inside a CDATA section or entity.
507
508 if ( state.inCData || state.doCData ) {
509 int saveIndent;
510
511 // Print a CDATA section. The text is not escaped, but ']]>'
512 // appearing in the code must be identified and dealt with.
513 // The contents of a text node is considered space preserving.
514 if ( ! state.inCData ) {
515 _printer.printText( "<![CDATA[" );
516 state.inCData = true;
517 }
518 saveIndent = _printer.getNextIndent();
519 _printer.setNextIndent( 0 );
520 char ch;
521 final int end = start + length;
522 for ( int index = start ; index < end; ++index ) {
523 ch = chars[index];
524 if ( ch == ']' && index + 2 < end &&
525 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
526 _printer.printText("]]]]><![CDATA[>");
527 index +=2;
528 continue;
529 }
530 if (!XMLChar.isValid(ch)) {
531 // check if it is surrogate
532 if (++index < end) {
533 surrogates(ch, chars[index]);
534 }
535 else {
536 fatalError("The character '"+(char)ch+"' is an invalid XML character");
537 }
538 continue;
539 } else {
540 if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0xF7 ) ||
541 ch == '\n' || ch == '\r' || ch == '\t' ) {
542 _printer.printText((char)ch);
543 } else {
544 // The character is not printable -- split CDATA section
545 _printer.printText("]]>&#x");
546 _printer.printText(Integer.toHexString(ch));
547 _printer.printText(";<![CDATA[");
548 }
549 }
550 }
551 _printer.setNextIndent( saveIndent );
552
553 } else {
554
555 int saveIndent;
556
557 if ( state.preserveSpace ) {
558 // If preserving space then hold of indentation so no
559 // excessive spaces are printed at line breaks, escape
560 // the text content without replacing spaces and print
561 // the text breaking only at line breaks.
562 saveIndent = _printer.getNextIndent();
563 _printer.setNextIndent( 0 );
564 printText( chars, start, length, true, state.unescaped );
565 _printer.setNextIndent( saveIndent );
566 } else {
567 printText( chars, start, length, false, state.unescaped );
568 }
569 }
570 } catch ( IOException except ) {
571 throw new SAXException( except );
572 }
573 }
574
575
576 public void ignorableWhitespace( char[] chars, int start, int length )
577 throws SAXException
578 {
579 int i;
580
581 try {
582 content();
583
584 // Print ignorable whitespaces only when indenting, after
585 // all they are indentation. Cancel the indentation to
586 // not indent twice.
587 if ( _indenting ) {
588 _printer.setThisIndent( 0 );
589 for ( i = start ; length-- > 0 ; ++i )
590 _printer.printText( chars[ i ] );
591 }
592 } catch ( IOException except ) {
593 throw new SAXException( except );
594 }
595 }
596
597
598 public final void processingInstruction( String target, String code )
599 throws SAXException
600 {
601 try {
602 processingInstructionIO( target, code );
603 } catch ( IOException except ) {
604 throw new SAXException( except );
605 }
606 }
607
608 public void processingInstructionIO( String target, String code )
609 throws IOException
610 {
611 int index;
612 ElementState state;
613
614 state = content();
615
616 // Create the processing instruction textual representation.
617 // Make sure we don't have '?>' inside either target or code.
618 index = target.indexOf( "?>" );
619 if ( index >= 0 )
620 fStrBuffer.append( "<?" ).append( target.substring( 0, index ) );
621 else
622 fStrBuffer.append( "<?" ).append( target );
623 if ( code != null ) {
624 fStrBuffer.append( ' ' );
625 index = code.indexOf( "?>" );
626 if ( index >= 0 )
627 fStrBuffer.append( code.substring( 0, index ) );
628 else
629 fStrBuffer.append( code );
630 }
631 fStrBuffer.append( "?>" );
632
633 // If before the root element (or after it), do not print
634 // the PI directly but place it in the pre-root vector.
635 if ( isDocumentState() ) {
636 if ( _preRoot == null )
637 _preRoot = new Vector();
638 _preRoot.addElement( fStrBuffer.toString() );
639 } else {
640 _printer.indent();
641 printText( fStrBuffer.toString(), true, true );
642 _printer.unindent();
643 if ( _indenting )
644 state.afterElement = true;
645 }
646
647 fStrBuffer.setLength(0);
648 }
649
650
651 public void comment( char[] chars, int start, int length )
652 throws SAXException
653 {
654 try {
655 comment( new String( chars, start, length ) );
656 } catch ( IOException except ) {
657 throw new SAXException( except );
658 }
659 }
660
661
662 public void comment( String text )
663 throws IOException
664 {
665 int index;
666 ElementState state;
667
668 if ( _format.getOmitComments() )
669 return;
670
671 state = content();
672 // Create the processing comment textual representation.
673 // Make sure we don't have '-->' inside the comment.
674 index = text.indexOf( "-->" );
675 if ( index >= 0 )
676 fStrBuffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
677 else
678 fStrBuffer.append( "<!--" ).append( text ).append( "-->" );
679
680 // If before the root element (or after it), do not print
681 // the comment directly but place it in the pre-root vector.
682 if ( isDocumentState() ) {
683 if ( _preRoot == null )
684 _preRoot = new Vector();
685 _preRoot.addElement( fStrBuffer.toString() );
686 } else {
687 // Indent this element on a new line if the first
688 // content of the parent element or immediately
689 // following an element.
690 if ( _indenting && ! state.preserveSpace)
691 _printer.breakLine();
692 _printer.indent();
693 printText( fStrBuffer.toString(), true, true );
694 _printer.unindent();
695 if ( _indenting )
696 state.afterElement = true;
697 }
698
699 fStrBuffer.setLength(0);
700 state.afterComment = true;
701 state.afterElement = false;
702 }
703
704
705 public void startCDATA()
706 {
707 ElementState state;
708
709 state = getElementState();
710 state.doCData = true;
711 }
712
713
714 public void endCDATA()
715 {
716 ElementState state;
717
718 state = getElementState();
719 state.doCData = false;
720 }
721
722
723 public void startNonEscaping()
724 {
725 ElementState state;
726
727 state = getElementState();
728 state.unescaped = true;
729 }
730
731
732 public void endNonEscaping()
733 {
734 ElementState state;
735
736 state = getElementState();
737 state.unescaped = false;
738 }
739
740
741 public void startPreserving()
742 {
743 ElementState state;
744
745 state = getElementState();
746 state.preserveSpace = true;
747 }
748
749
750 public void endPreserving()
751 {
752 ElementState state;
753
754 state = getElementState();
755 state.preserveSpace = false;
756 }
757
758
759 /**
760 * Called at the end of the document to wrap it up.
761 * Will flush the output stream and throw an exception
762 * if any I/O error occured while serializing.
763 *
764 * @throws SAXException An I/O exception occured during
765 * serializing
766 */
767 public void endDocument()
768 throws SAXException
769 {
770 try {
771 // Print all the elements accumulated outside of
772 // the root element.
773 serializePreRoot();
774 // Flush the output, this is necessary for fStrBuffered output.
775 _printer.flush();
776 } catch ( IOException except ) {
777 throw new SAXException( except );
778 }
779 }
780
781
782 public void startEntity( String name )
783 {
784 // ???
785 }
786
787
788 public void endEntity( String name )
789 {
790 // ???
791 }
792
793
794 public void setDocumentLocator( Locator locator )
795 {
796 // Nothing to do
797 }
798
799
800 //-----------------------------------------//
801 // SAX content handler serializing methods //
802 //-----------------------------------------//
803
804
805 public void skippedEntity ( String name )
806 throws SAXException
807 {
808 try {
809 endCDATA();
810 content();
811 _printer.printText( '&' );
812 _printer.printText( name );
813 _printer.printText( ';' );
814 } catch ( IOException except ) {
815 throw new SAXException( except );
816 }
817 }
818
819
820 public void startPrefixMapping( String prefix, String uri )
821 throws SAXException
822 {
823 if ( _prefixes == null )
824 _prefixes = new Hashtable();
825 _prefixes.put( uri, prefix == null ? "" : prefix );
826 }
827
828
829 public void endPrefixMapping( String prefix )
830 throws SAXException
831 {
832 }
833
834
835 //------------------------------------------//
836 // SAX DTD/Decl handler serializing methods //
837 //------------------------------------------//
838
839
840 public final void startDTD( String name, String publicId, String systemId )
841 throws SAXException
842 {
843 try {
844 _printer.enterDTD();
845 _docTypePublicId = publicId;
846 _docTypeSystemId = systemId;
847
848 } catch ( IOException except ) {
849 throw new SAXException( except );
850 }
851 }
852
853
854 public void endDTD()
855 {
856 // Nothing to do here, all the magic occurs in startDocument(String).
857 }
858
859
860 public void elementDecl( String name, String model )
861 throws SAXException
862 {
863 try {
864 _printer.enterDTD();
865 _printer.printText( "<!ELEMENT " );
866 _printer.printText( name );
867 _printer.printText( ' ' );
868 _printer.printText( model );
869 _printer.printText( '>' );
870 if ( _indenting )
871 _printer.breakLine();
872 } catch ( IOException except ) {
873 throw new SAXException( except );
874 }
875 }
876
877
878 public void attributeDecl( String eName, String aName, String type,
879 String valueDefault, String value )
880 throws SAXException
881 {
882 try {
883 _printer.enterDTD();
884 _printer.printText( "<!ATTLIST " );
885 _printer.printText( eName );
886 _printer.printText( ' ' );
887 _printer.printText( aName );
888 _printer.printText( ' ' );
889 _printer.printText( type );
890 if ( valueDefault != null ) {
891 _printer.printText( ' ' );
892 _printer.printText( valueDefault );
893 }
894 if ( value != null ) {
895 _printer.printText( " \"" );
896 printEscaped( value );
897 _printer.printText( '"' );
898 }
899 _printer.printText( '>' );
900 if ( _indenting )
901 _printer.breakLine();
902 } catch ( IOException except ) {
903 throw new SAXException( except );
904 }
905 }
906
907
908 public void internalEntityDecl( String name, String value )
909 throws SAXException
910 {
911 try {
912 _printer.enterDTD();
913 _printer.printText( "<!ENTITY " );
914 _printer.printText( name );
915 _printer.printText( " \"" );
916 printEscaped( value );
917 _printer.printText( "\">" );
918 if ( _indenting )
919 _printer.breakLine();
920 } catch ( IOException except ) {
921 throw new SAXException( except );
922 }
923 }
924
925
926 public void externalEntityDecl( String name, String publicId, String systemId )
927 throws SAXException
928 {
929 try {
930 _printer.enterDTD();
931 unparsedEntityDecl( name, publicId, systemId, null );
932 } catch ( IOException except ) {
933 throw new SAXException( except );
934 }
935 }
936
937
938 public void unparsedEntityDecl( String name, String publicId,
939 String systemId, String notationName )
940 throws SAXException
941 {
942 try {
943 _printer.enterDTD();
944 if ( publicId == null ) {
945 _printer.printText( "<!ENTITY " );
946 _printer.printText( name );
947 _printer.printText( " SYSTEM " );
948 printDoctypeURL( systemId );
949 } else {
950 _printer.printText( "<!ENTITY " );
951 _printer.printText( name );
952 _printer.printText( " PUBLIC " );
953 printDoctypeURL( publicId );
954 _printer.printText( ' ' );
955 printDoctypeURL( systemId );
956 }
957 if ( notationName != null ) {
958 _printer.printText( " NDATA " );
959 _printer.printText( notationName );
960 }
961 _printer.printText( '>' );
962 if ( _indenting )
963 _printer.breakLine();
964 } catch ( IOException except ) {
965 throw new SAXException( except );
966 }
967 }
968
969
970 public void notationDecl( String name, String publicId, String systemId )
971 throws SAXException
972 {
973 try {
974 _printer.enterDTD();
975 if ( publicId != null ) {
976 _printer.printText( "<!NOTATION " );
977 _printer.printText( name );
978 _printer.printText( " PUBLIC " );
979 printDoctypeURL( publicId );
980 if ( systemId != null ) {
981 _printer.printText( ' ' );
982 printDoctypeURL( systemId );
983 }
984 } else {
985 _printer.printText( "<!NOTATION " );
986 _printer.printText( name );
987 _printer.printText( " SYSTEM " );
988 printDoctypeURL( systemId );
989 }
990 _printer.printText( '>' );
991 if ( _indenting )
992 _printer.breakLine();
993 } catch ( IOException except ) {
994 throw new SAXException( except );
995 }
996 }
997
998
999 //------------------------------------------//
1000 // Generic node serializing methods methods //
1001 //------------------------------------------//
1002
1003
1004 /**
1005 * Serialize the DOM node. This method is shared across XML, HTML and XHTML
1006 * serializers and the differences are masked out in a separate {@link
1007 * #serializeElement}.
1008 *
1009 * @param node The node to serialize
1010 * @see #serializeElement
1011 * @throws IOException An I/O exception occured while
1012 * serializing
1013 */
1014 protected void serializeNode( Node node )
1015 throws IOException
1016 {
1017 fCurrentNode = node;
1018
1019 // Based on the node type call the suitable SAX handler.
1020 // Only comments entities and documents which are not
1021 // handled by SAX are serialized directly.
1022 switch ( node.getNodeType() ) {
1023 case Node.TEXT_NODE : {
1024 String text;
1025
1026 text = node.getNodeValue();
1027 if ( text != null ) {
1028 if (fDOMFilter !=null &&
1029 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT)!= 0) {
1030 short code = fDOMFilter.acceptNode(node);
1031 switch (code) {
1032 case NodeFilter.FILTER_REJECT:
1033 case NodeFilter.FILTER_SKIP: {
1034 break;
1035 }
1036 default: {
1037 characters(text);
1038 }
1039 }
1040 }
1041 else if ( !_indenting || getElementState().preserveSpace
1042 || (text.replace('\n',' ').trim().length() != 0))
1043 characters( text );
1044
1045 }
1046 break;
1047 }
1048
1049 case Node.CDATA_SECTION_NODE : {
1050 String text = node.getNodeValue();
1051 if ((features & DOMSerializerImpl.CDATA) != 0) {
1052 if (text != null) {
1053 if (fDOMFilter != null
1054 && (fDOMFilter.getWhatToShow()
1055 & NodeFilter.SHOW_CDATA_SECTION)
1056 != 0) {
1057 short code = fDOMFilter.acceptNode(node);
1058 switch (code) {
1059 case NodeFilter.FILTER_REJECT :
1060 case NodeFilter.FILTER_SKIP :
1061 {
1062 // skip the CDATA node
1063 return;
1064 }
1065 default :
1066 {
1067 //fall through..
1068 }
1069 }
1070 }
1071 startCDATA();
1072 characters(text);
1073 endCDATA();
1074 }
1075 } else {
1076 // transform into a text node
1077 characters(text);
1078 }
1079 break;
1080 }
1081 case Node.COMMENT_NODE : {
1082 String text;
1083
1084 if ( ! _format.getOmitComments() ) {
1085 text = node.getNodeValue();
1086 if ( text != null ) {
1087
1088 if (fDOMFilter !=null &&
1089 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT)!= 0) {
1090 short code = fDOMFilter.acceptNode(node);
1091 switch (code) {
1092 case NodeFilter.FILTER_REJECT:
1093 case NodeFilter.FILTER_SKIP: {
1094 // skip the comment node
1095 return;
1096 }
1097 default: {
1098 // fall through
1099 }
1100 }
1101 }
1102 comment( text );
1103 }
1104 }
1105 break;
1106 }
1107
1108 case Node.ENTITY_REFERENCE_NODE : {
1109 Node child;
1110
1111 endCDATA();
1112 content();
1113
1114 if (((features & DOMSerializerImpl.ENTITIES) != 0)
1115 || (node.getFirstChild() == null)) {
1116 if (fDOMFilter !=null &&
1117 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) {
1118 short code = fDOMFilter.acceptNode(node);
1119 switch (code) {
1120 case NodeFilter.FILTER_REJECT:{
1121 return; // remove the node
1122 }
1123 case NodeFilter.FILTER_SKIP: {
1124 child = node.getFirstChild();
1125 while ( child != null ) {
1126 serializeNode( child );
1127 child = child.getNextSibling();
1128 }
1129 return;
1130 }
1131
1132 default: {
1133 // fall through
1134 }
1135 }
1136 }
1137 checkUnboundNamespacePrefixedNode(node);
1138
1139 _printer.printText("&");
1140 _printer.printText(node.getNodeName());
1141 _printer.printText(";");
1142 }
1143 else {
1144 child = node.getFirstChild();
1145 while ( child != null ) {
1146 serializeNode( child );
1147 child = child.getNextSibling();
1148 }
1149 }
1150
1151 break;
1152 }
1153
1154 case Node.PROCESSING_INSTRUCTION_NODE : {
1155
1156 if (fDOMFilter !=null &&
1157 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION)!= 0) {
1158 short code = fDOMFilter.acceptNode(node);
1159 switch (code) {
1160 case NodeFilter.FILTER_REJECT:
1161 case NodeFilter.FILTER_SKIP: {
1162 return; // skip this node
1163 }
1164 default: { // fall through
1165 }
1166 }
1167 }
1168 processingInstructionIO( node.getNodeName(), node.getNodeValue() );
1169 break;
1170 }
1171 case Node.ELEMENT_NODE : {
1172
1173 if (fDOMFilter !=null &&
1174 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT)!= 0) {
1175 short code = fDOMFilter.acceptNode(node);
1176 switch (code) {
1177 case NodeFilter.FILTER_REJECT: {
1178 return;
1179 }
1180 case NodeFilter.FILTER_SKIP: {
1181 Node child = node.getFirstChild();
1182 while ( child != null ) {
1183 serializeNode( child );
1184 child = child.getNextSibling();
1185 }
1186 return; // skip this node
1187 }
1188
1189 default: { // fall through
1190 }
1191 }
1192 }
1193 serializeElement( (Element) node );
1194 break;
1195 }
1196 case Node.DOCUMENT_NODE : {
1197 DocumentType docType;
1198 DOMImplementation domImpl;
1199 NamedNodeMap map;
1200 Entity entity;
1201 Notation notation;
1202 int i;
1203
1204 serializeDocument();
1205
1206 // If there is a document type, use the SAX events to
1207 // serialize it.
1208 docType = ( (Document) node ).getDoctype();
1209 if (docType != null) {
1210 // DOM Level 2 (or higher)
1211 domImpl = ( (Document) node ).getImplementation();
1212 try {
1213 String internal;
1214
1215 _printer.enterDTD();
1216 _docTypePublicId = docType.getPublicId();
1217 _docTypeSystemId = docType.getSystemId();
1218 internal = docType.getInternalSubset();
1219 if ( internal != null && internal.length() > 0 )
1220 _printer.printText( internal );
1221 endDTD();
1222 }
1223 // DOM Level 1 -- does implementation have methods?
1224 catch (NoSuchMethodError nsme) {
1225 Class docTypeClass = docType.getClass();
1226
1227 String docTypePublicId = null;
1228 String docTypeSystemId = null;
1229 try {
1230 java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", (Class[]) null);
1231 if (getPublicId.getReturnType().equals(String.class)) {
1232 docTypePublicId = (String)getPublicId.invoke(docType, (Object[]) null);
1233 }
1234 }
1235 catch (Exception e) {
1236 // ignore
1237 }
1238 try {
1239 java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", (Class[]) null);
1240 if (getSystemId.getReturnType().equals(String.class)) {
1241 docTypeSystemId = (String)getSystemId.invoke(docType, (Object[]) null);
1242 }
1243 }
1244 catch (Exception e) {
1245 // ignore
1246 }
1247 _printer.enterDTD();
1248 _docTypePublicId = docTypePublicId;
1249 _docTypeSystemId = docTypeSystemId;
1250 endDTD();
1251 }
1252
1253 serializeDTD(docType.getName());
1254
1255 }
1256 _started = true;
1257
1258 // !! Fall through
1259 }
1260 case Node.DOCUMENT_FRAGMENT_NODE : {
1261 Node child;
1262
1263 // By definition this will happen if the node is a document,
1264 // document fragment, etc. Just serialize its contents. It will
1265 // work well for other nodes that we do not know how to serialize.
1266 child = node.getFirstChild();
1267 while ( child != null ) {
1268 serializeNode( child );
1269 child = child.getNextSibling();
1270 }
1271 break;
1272 }
1273
1274 default:
1275 break;
1276 }
1277 }
1278
1279
1280 /* Serializes XML Declaration, according to 'xml-declaration' property.
1281 */
1282 protected void serializeDocument()throws IOException {
1283 int i;
1284
1285 String dtd = _printer.leaveDTD();
1286 if (! _started) {
1287
1288 if (! _format.getOmitXMLDeclaration()) {
1289 StringBuffer buffer;
1290
1291 // Serialize the document declaration appreaing at the head
1292 // of very XML document (unless asked not to).
1293 buffer = new StringBuffer( "<?xml version=\"" );
1294 if (_format.getVersion() != null)
1295 buffer.append( _format.getVersion() );
1296 else
1297 buffer.append( "1.0" );
1298 buffer.append( '"' );
1299 String format_encoding = _format.getEncoding();
1300 if (format_encoding != null) {
1301 buffer.append( " encoding=\"" );
1302 buffer.append( format_encoding );
1303 buffer.append( '"' );
1304 }
1305 if (_format.getStandalone() && _docTypeSystemId == null &&
1306 _docTypePublicId == null)
1307 buffer.append( " standalone=\"yes\"" );
1308 buffer.append( "?>" );
1309 _printer.printText( buffer );
1310 _printer.breakLine();
1311 }
1312 }
1313
1314 // Always serialize these, even if not te first root element.
1315 serializePreRoot();
1316
1317 }
1318
1319 /* Serializes DTD, if present.
1320 */
1321 protected void serializeDTD(String name) throws IOException{
1322
1323 String dtd = _printer.leaveDTD();
1324 if (! _format.getOmitDocumentType()) {
1325 if (_docTypeSystemId != null) {
1326 // System identifier must be specified to print DOCTYPE.
1327 // If public identifier is specified print 'PUBLIC
1328 // <public> <system>', if not, print 'SYSTEM <system>'.
1329 _printer.printText( "<!DOCTYPE " );
1330 _printer.printText( name );
1331 if (_docTypePublicId != null) {
1332 _printer.printText( " PUBLIC " );
1333 printDoctypeURL( _docTypePublicId );
1334 if (_indenting) {
1335 _printer.breakLine();
1336 for (int i = 0 ; i < 18 + name.length() ; ++i)
1337 _printer.printText( " " );
1338 } else
1339 _printer.printText( " " );
1340 printDoctypeURL( _docTypeSystemId );
1341 } else {
1342 _printer.printText( " SYSTEM " );
1343 printDoctypeURL( _docTypeSystemId );
1344 }
1345
1346 // If we accumulated any DTD contents while printing.
1347 // this would be the place to print it.
1348 if (dtd != null && dtd.length() > 0) {
1349 _printer.printText( " [" );
1350 printText( dtd, true, true );
1351 _printer.printText( ']' );
1352 }
1353
1354 _printer.printText( ">" );
1355 _printer.breakLine();
1356 } else if (dtd != null && dtd.length() > 0) {
1357 _printer.printText( "<!DOCTYPE " );
1358 _printer.printText( name );
1359 _printer.printText( " [" );
1360 printText( dtd, true, true );
1361 _printer.printText( "]>" );
1362 _printer.breakLine();
1363 }
1364 }
1365 }
1366
1367
1368 /**
1369 * Must be called by a method about to print any type of content.
1370 * If the element was just opened, the opening tag is closed and
1371 * will be matched to a closing tag. Returns the current element
1372 * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1373 *
1374 * @return The current element state
1375 * @throws IOException An I/O exception occured while
1376 * serializing
1377 */
1378 protected ElementState content()
1379 throws IOException
1380 {
1381 ElementState state;
1382
1383 state = getElementState();
1384 if ( ! isDocumentState() ) {
1385 // Need to close CData section first
1386 if ( state.inCData && ! state.doCData ) {
1387 _printer.printText( "]]>" );
1388 state.inCData = false;
1389 }
1390 // If this is the first content in the element,
1391 // change the state to not-empty and close the
1392 // opening element tag.
1393 if ( state.empty ) {
1394 _printer.printText( '>' );
1395 state.empty = false;
1396 }
1397 // Except for one content type, all of them
1398 // are not last element. That one content
1399 // type will take care of itself.
1400 state.afterElement = false;
1401 // Except for one content type, all of them
1402 // are not last comment. That one content
1403 // type will take care of itself.
1404 state.afterComment = false;
1405 }
1406 return state;
1407 }
1408
1409
1410 /**
1411 * Called to print the text contents in the prevailing element format.
1412 * Since this method is capable of printing text as CDATA, it is used
1413 * for that purpose as well. White space handling is determined by the
1414 * current element state. In addition, the output format can dictate
1415 * whether the text is printed as CDATA or unescaped.
1416 *
1417 * @param text The text to print
1418 * @param unescaped True is should print unescaped
1419 * @throws IOException An I/O exception occured while
1420 * serializing
1421 */
1422 protected void characters( String text )
1423 throws IOException
1424 {
1425 ElementState state;
1426
1427 state = content();
1428 // Check if text should be print as CDATA section or unescaped
1429 // based on elements listed in the output format (the element
1430 // state) or whether we are inside a CDATA section or entity.
1431
1432 if ( state.inCData || state.doCData ) {
1433 int index;
1434 int saveIndent;
1435
1436 // Print a CDATA section. The text is not escaped, but ']]>'
1437 // appearing in the code must be identified and dealt with.
1438 // The contents of a text node is considered space preserving.
1439 if ( ! state.inCData ) {
1440 _printer.printText("<![CDATA[");
1441 state.inCData = true;
1442 }
1443 saveIndent = _printer.getNextIndent();
1444 _printer.setNextIndent( 0 );
1445 printCDATAText( text);
1446 _printer.setNextIndent( saveIndent );
1447
1448 } else {
1449
1450 int saveIndent;
1451
1452 if ( state.preserveSpace ) {
1453 // If preserving space then hold of indentation so no
1454 // excessive spaces are printed at line breaks, escape
1455 // the text content without replacing spaces and print
1456 // the text breaking only at line breaks.
1457 saveIndent = _printer.getNextIndent();
1458 _printer.setNextIndent( 0 );
1459 printText( text, true, state.unescaped );
1460 _printer.setNextIndent( saveIndent );
1461 } else {
1462 printText( text, false, state.unescaped );
1463 }
1464 }
1465 }
1466
1467
1468 /**
1469 * Returns the suitable entity reference for this character value,
1470 * or null if no such entity exists. Calling this method with <tt>'&'</tt>
1471 * will return <tt>"&amp;"</tt>.
1472 *
1473 * @param ch Character value
1474 * @return Character entity name, or null
1475 */
1476 protected abstract String getEntityRef( int ch );
1477
1478
1479 /**
1480 * Called to serializee the DOM element. The element is serialized based on
1481 * the serializer's method (XML, HTML, XHTML).
1482 *
1483 * @param elem The element to serialize
1484 * @throws IOException An I/O exception occured while
1485 * serializing
1486 */
1487 protected abstract void serializeElement( Element elem )
1488 throws IOException;
1489
1490
1491 /**
1492 * Comments and PIs cannot be serialized before the root element,
1493 * because the root element serializes the document type, which
1494 * generally comes first. Instead such PIs and comments are
1495 * accumulated inside a vector and serialized by calling this
1496 * method. Will be called when the root element is serialized
1497 * and when the document finished serializing.
1498 *
1499 * @throws IOException An I/O exception occured while
1500 * serializing
1501 */
1502 protected void serializePreRoot()
1503 throws IOException
1504 {
1505 int i;
1506
1507 if ( _preRoot != null ) {
1508 for ( i = 0 ; i < _preRoot.size() ; ++i ) {
1509 printText( (String) _preRoot.elementAt( i ), true, true );
1510 if ( _indenting )
1511 _printer.breakLine();
1512 }
1513 _preRoot.removeAllElements();
1514 }
1515 }
1516
1517
1518 //---------------------------------------------//
1519 // Text pretty printing and formatting methods //
1520 //---------------------------------------------//
1521
1522 protected void printCDATAText( String text ) throws IOException {
1523 int length = text.length();
1524 char ch;
1525
1526 for ( int index = 0 ; index < length; ++index ) {
1527 ch = text.charAt( index );
1528 if (ch == ']'
1529 && index + 2 < length
1530 && text.charAt(index + 1) == ']'
1531 && text.charAt(index + 2) == '>') { // check for ']]>'
1532 if (fDOMErrorHandler != null) {
1533 // REVISIT: this means that if DOM Error handler is not registered we don't report any
1534 // fatal errors and might serialize not wellformed document
1535 if ((features & DOMSerializerImpl.SPLITCDATA) == 0) {
1536 String msg = DOMMessageFormatter.formatMessage(
1537 DOMMessageFormatter.SERIALIZER_DOMAIN,
1538 "EndingCDATA",
1539 null);
1540 if ((features & DOMSerializerImpl.WELLFORMED) != 0) {
1541 // issue fatal error
1542 modifyDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, "wf-invalid-character", fCurrentNode);
1543 fDOMErrorHandler.handleError(fDOMError);
1544 throw new LSException(LSException.SERIALIZE_ERR, msg);
1545 }
1546 else {
1547 // issue error
1548 modifyDOMError(msg, DOMError.SEVERITY_ERROR, "cdata-section-not-splitted", fCurrentNode);
1549 if (!fDOMErrorHandler.handleError(fDOMError)) {
1550 throw new LSException(LSException.SERIALIZE_ERR, msg);
1551 }
1552 }
1553 } else {
1554 // issue warning
1555 String msg =
1556 DOMMessageFormatter.formatMessage(
1557 DOMMessageFormatter.SERIALIZER_DOMAIN,
1558 "SplittingCDATA",
1559 null);
1560 modifyDOMError(
1561 msg,
1562 DOMError.SEVERITY_WARNING,
1563 null, fCurrentNode);
1564 fDOMErrorHandler.handleError(fDOMError);
1565 }
1566 }
1567 // split CDATA section
1568 _printer.printText("]]]]><![CDATA[>");
1569 index += 2;
1570 continue;
1571 }
1572
1573 if (!XMLChar.isValid(ch)) {
1574 // check if it is surrogate
1575 if (++index <length) {
1576 surrogates(ch, text.charAt(index));
1577 }
1578 else {
1579 fatalError("The character '"+(char)ch+"' is an invalid XML character");
1580 }
1581 continue;
1582 } else {
1583 if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0xF7 ) ||
1584 ch == '\n' || ch == '\r' || ch == '\t' ) {
1585 _printer.printText((char)ch);
1586 } else {
1587
1588 // The character is not printable -- split CDATA section
1589 _printer.printText("]]>&#x");
1590 _printer.printText(Integer.toHexString(ch));
1591 _printer.printText(";<![CDATA[");
1592 }
1593 }
1594 }
1595 }
1596
1597
1598 protected void surrogates(int high, int low) throws IOException{
1599 if (XMLChar.isHighSurrogate(high)) {
1600 if (!XMLChar.isLowSurrogate(low)) {
1601 //Invalid XML
1602 fatalError("The character '"+(char)low+"' is an invalid XML character");
1603 }
1604 else {
1605 int supplemental = XMLChar.supplemental((char)high, (char)low);
1606 if (!XMLChar.isValid(supplemental)) {
1607 //Invalid XML
1608 fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
1609 }
1610 else {
1611 if (content().inCData ) {
1612 _printer.printText("]]>&#x");
1613 _printer.printText(Integer.toHexString(supplemental));
1614 _printer.printText(";<![CDATA[");
1615 }
1616 else {
1617 printHex(supplemental);
1618 }
1619 }
1620 }
1621 } else {
1622 fatalError("The character '"+(char)high+"' is an invalid XML character");
1623 }
1624
1625 }
1626
1627 /**
1628 * Called to print additional text with whitespace handling.
1629 * If spaces are preserved, the text is printed as if by calling
1630 * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine}
1631 * for each new line. If spaces are not preserved, the text is
1632 * broken at space boundaries if longer than the line width;
1633 * Multiple spaces are printed as such, but spaces at beginning
1634 * of line are removed.
1635 *
1636 * @param text The text to print
1637 * @param preserveSpace Space preserving flag
1638 * @param unescaped Print unescaped
1639 */
1640 protected void printText( char[] chars, int start, int length,
1641 boolean preserveSpace, boolean unescaped )
1642 throws IOException
1643 {
1644 int index;
1645 char ch;
1646
1647 if ( preserveSpace ) {
1648 // Preserving spaces: the text must print exactly as it is,
1649 // without breaking when spaces appear in the text and without
1650 // consolidating spaces. If a line terminator is used, a line
1651 // break will occur.
1652 while ( length-- > 0 ) {
1653 ch = chars[ start ];
1654 ++start;
1655 if ( ch == '\n' || ch == '\r' || unescaped )
1656 _printer.printText( ch );
1657 else
1658 printEscaped( ch );
1659 }
1660 } else {
1661 // Not preserving spaces: print one part at a time, and
1662 // use spaces between parts to break them into different
1663 // lines. Spaces at beginning of line will be stripped
1664 // by printing mechanism. Line terminator is treated
1665 // no different than other text part.
1666 while ( length-- > 0 ) {
1667 ch = chars[ start ];
1668 ++start;
1669 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1670 _printer.printSpace();
1671 else if ( unescaped )
1672 _printer.printText( ch );
1673 else
1674 printEscaped( ch );
1675 }
1676 }
1677 }
1678
1679
1680 protected void printText( String text, boolean preserveSpace, boolean unescaped )
1681 throws IOException
1682 {
1683 int index;
1684 char ch;
1685
1686 if ( preserveSpace ) {
1687 // Preserving spaces: the text must print exactly as it is,
1688 // without breaking when spaces appear in the text and without
1689 // consolidating spaces. If a line terminator is used, a line
1690 // break will occur.
1691 for ( index = 0 ; index < text.length() ; ++index ) {
1692 ch = text.charAt( index );
1693 if ( ch == '\n' || ch == '\r' || unescaped )
1694 _printer.printText( ch );
1695 else
1696 printEscaped( ch );
1697 }
1698 } else {
1699 // Not preserving spaces: print one part at a time, and
1700 // use spaces between parts to break them into different
1701 // lines. Spaces at beginning of line will be stripped
1702 // by printing mechanism. Line terminator is treated
1703 // no different than other text part.
1704 for ( index = 0 ; index < text.length() ; ++index ) {
1705 ch = text.charAt( index );
1706 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1707 _printer.printSpace();
1708 else if ( unescaped )
1709 _printer.printText( ch );
1710 else
1711 printEscaped( ch );
1712 }
1713 }
1714 }
1715
1716
1717 /**
1718 * Print a document type public or system identifier URL.
1719 * Encapsulates the URL in double quotes, escapes non-printing
1720 * characters and print it equivalent to {@link #printText}.
1721 *
1722 * @param url The document type url to print
1723 */
1724 protected void printDoctypeURL( String url )
1725 throws IOException
1726 {
1727 int i;
1728
1729 _printer.printText( '"' );
1730 for( i = 0 ; i < url.length() ; ++i ) {
1731 if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
1732 _printer.printText( '%' );
1733 _printer.printText( Integer.toHexString( url.charAt( i ) ) );
1734 } else
1735 _printer.printText( url.charAt( i ) );
1736 }
1737 _printer.printText( '"' );
1738 }
1739
1740
1741 protected void printEscaped( int ch )
1742 throws IOException
1743 {
1744 String charRef;
1745 // If there is a suitable entity reference for this
1746 // character, print it. The list of available entity
1747 // references is almost but not identical between
1748 // XML and HTML.
1749 charRef = getEntityRef( ch );
1750 if ( charRef != null ) {
1751 _printer.printText( '&' );
1752 _printer.printText( charRef );
1753 _printer.printText( ';' );
1754 } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0xF7 ) ||
1755 ch == '\n' || ch == '\r' || ch == '\t' ) {
1756 // Non printables are below ASCII space but not tab or line
1757 // terminator, ASCII delete, or above a certain Unicode threshold.
1758 if (ch < 0x10000) {
1759 _printer.printText((char)ch );
1760 } else {
1761 _printer.printText((char)(((ch-0x10000)>>10)+0xd800));
1762 _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
1763 }
1764 } else {
1765 printHex(ch);
1766 }
1767 }
1768
1769 /**
1770 * Escapes chars
1771 */
1772 final void printHex( int ch) throws IOException {
1773 _printer.printText( "&#x" );
1774 _printer.printText(Integer.toHexString(ch));
1775 _printer.printText( ';' );
1776
1777 }
1778
1779
1780 /**
1781 * Escapes a string so it may be printed as text content or attribute
1782 * value. Non printable characters are escaped using character references.
1783 * Where the format specifies a deault entity reference, that reference
1784 * is used (e.g. <tt>&lt;</tt>).
1785 *
1786 * @param source The string to escape
1787 */
1788 protected void printEscaped( String source )
1789 throws IOException
1790 {
1791 for ( int i = 0 ; i < source.length() ; ++i ) {
1792 int ch = source.charAt(i);
1793 if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
1794 int lowch = source.charAt(i+1);
1795 if ((lowch & 0xfc00) == 0xdc00) {
1796 ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
1797 i++;
1798 }
1799 }
1800 printEscaped(ch);
1801 }
1802 }
1803
1804
1805 //--------------------------------//
1806 // Element state handling methods //
1807 //--------------------------------//
1808
1809
1810 /**
1811 * Return the state of the current element.
1812 *
1813 * @return Current element state
1814 */
1815 protected ElementState getElementState()
1816 {
1817 return _elementStates[ _elementStateCount ];
1818 }
1819
1820
1821 /**
1822 * Enter a new element state for the specified element.
1823 * Tag name and space preserving is specified, element
1824 * state is initially empty.
1825 *
1826 * @return Current element state, or null
1827 */
1828 protected ElementState enterElementState( String namespaceURI, String localName,
1829 String rawName, boolean preserveSpace )
1830 {
1831 ElementState state;
1832
1833 if ( _elementStateCount + 1 == _elementStates.length ) {
1834 ElementState[] newStates;
1835
1836 // Need to create a larger array of states. This does not happen
1837 // often, unless the document is really deep.
1838 newStates = new ElementState[ _elementStates.length + 10 ];
1839 for ( int i = 0 ; i < _elementStates.length ; ++i )
1840 newStates[ i ] = _elementStates[ i ];
1841 for ( int i = _elementStates.length ; i < newStates.length ; ++i )
1842 newStates[ i ] = new ElementState();
1843 _elementStates = newStates;
1844 }
1845
1846 ++_elementStateCount;
1847 state = _elementStates[ _elementStateCount ];
1848 state.namespaceURI = namespaceURI;
1849 state.localName = localName;
1850 state.rawName = rawName;
1851 state.preserveSpace = preserveSpace;
1852 state.empty = true;
1853 state.afterElement = false;
1854 state.afterComment = false;
1855 state.doCData = state.inCData = false;
1856 state.unescaped = false;
1857 state.prefixes = _prefixes;
1858
1859 _prefixes = null;
1860 return state;
1861 }
1862
1863
1864 /**
1865 * Leave the current element state and return to the
1866 * state of the parent element. If this was the root
1867 * element, return to the state of the document.
1868 *
1869 * @return Previous element state
1870 */
1871 protected ElementState leaveElementState()
1872 {
1873 if ( _elementStateCount > 0 ) {
1874 /*Corrected by David Blondeau (blondeau@intalio.com)*/
1875 _prefixes = null;
1876 //_prefixes = _elementStates[ _elementStateCount ].prefixes;
1877 -- _elementStateCount;
1878 return _elementStates[ _elementStateCount ];
1879 } else {
1880 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null);
1881 throw new IllegalStateException(msg);
1882 }
1883 }
1884
1885
1886 /**
1887 * Returns true if in the state of the document.
1888 * Returns true before entering any element and after
1889 * leaving the root element.
1890 *
1891 * @return True if in the state of the document
1892 */
1893 protected boolean isDocumentState()
1894 {
1895 return _elementStateCount == 0;
1896 }
1897
1898
1899 /**
1900 * Returns the namespace prefix for the specified URI.
1901 * If the URI has been mapped to a prefix, returns the
1902 * prefix, otherwise returns null.
1903 *
1904 * @param namespaceURI The namespace URI
1905 * @return The namespace prefix if known, or null
1906 */
1907 protected String getPrefix( String namespaceURI )
1908 {
1909 String prefix;
1910
1911 if ( _prefixes != null ) {
1912 prefix = (String) _prefixes.get( namespaceURI );
1913 if ( prefix != null )
1914 return prefix;
1915 }
1916 if ( _elementStateCount == 0 )
1917 return null;
1918 else {
1919 for ( int i = _elementStateCount ; i > 0 ; --i ) {
1920 if ( _elementStates[ i ].prefixes != null ) {
1921 prefix = (String) _elementStates[ i ].prefixes.get( namespaceURI );
1922 if ( prefix != null )
1923 return prefix;
1924 }
1925 }
1926 }
1927 return null;
1928 }
1929
1930 /**
1931 * The method modifies global DOM error object
1932 *
1933 * @param message
1934 * @param severity
1935 * @param type
1936 * @return a DOMError
1937 */
1938 protected DOMError modifyDOMError(String message, short severity, String type, Node node){
1939 fDOMError.reset();
1940 fDOMError.fMessage = message;
1941 fDOMError.fType = type;
1942 fDOMError.fSeverity = severity;
1943 fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null);
1944 return fDOMError;
1945
1946 }
1947
1948
1949 protected void fatalError(String message) throws IOException{
1950 if (fDOMErrorHandler != null) {
1951 modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, null, fCurrentNode);
1952 fDOMErrorHandler.handleError(fDOMError);
1953 }
1954 else {
1955 throw new IOException(message);
1956 }
1957 }
1958
1959 /**
1960 * DOM level 3:
1961 * Check a node to determine if it contains unbound namespace prefixes.
1962 *
1963 * @param node The node to check for unbound namespace prefices
1964 */
1965 protected void checkUnboundNamespacePrefixedNode (Node node) throws IOException{
1966
1967 }
1968 }