1 /*
2 * reserved comment block
3 * DO NOT REMOVE OR ALTER!
4 */
5 /*
6 * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21
22
23 // Sep 14, 2000:
24 // Fixed problem with namespace handling. Contributed by
25 // David Blondeau <blondeau@intalio.com>
26 // Sep 14, 2000:
27 // Fixed serializer to report IO exception directly, instead at
28 // the end of document processing.
29 // Reported by Patrick Higgins <phiggins@transzap.com>
30 // Aug 21, 2000:
31 // Fixed bug in startDocument not calling prepare.
32 // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
33 // Aug 21, 2000:
34 // Added ability to omit DOCTYPE declaration.
35
36
37 package com.sun.org.apache.xml.internal.serialize;
38
39
40 import java.io.IOException;
41 import java.io.OutputStream;
42 import java.io.Writer;
43
44 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
45 import com.sun.org.apache.xerces.internal.impl.Constants;
46 import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
47 import com.sun.org.apache.xerces.internal.util.SymbolTable;
48 import com.sun.org.apache.xerces.internal.util.XML11Char;
49 import com.sun.org.apache.xerces.internal.util.XMLChar;
50 import org.xml.sax.SAXException;
51 import org.w3c.dom.DOMError;
52
53 /**
54 * Implements an XML serializer supporting both DOM and SAX pretty
55 * serializing. For usage instructions see {@link Serializer}.
56 * <p>
57 * If an output stream is used, the encoding is taken from the
58 * output format (defaults to <tt>UTF-8</tt>). If a writer is
59 * used, make sure the writer uses the same encoding (if applies)
60 * as specified in the output format.
61 * <p>
62 * The serializer supports both DOM and SAX. SAX serializing is done by firing
63 * SAX events and using the serializer as a document handler. DOM serializing is done
64 * by calling {@link #serialize(Document)} or by using DOM Level 3
65 * {@link org.w3c.dom.ls.DOMSerializer} and
66 * serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
67 * {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
68 * <p>
69 * If an I/O exception occurs while serializing, the serializer
70 * will not throw an exception directly, but only throw it
71 * at the end of serializing (either DOM or SAX's {@link
72 * org.xml.sax.DocumentHandler#endDocument}.
73 * <p>
74 * For elements that are not specified as whitespace preserving,
75 * the serializer will potentially break long text lines at space
76 * boundaries, indent lines, and serialize elements on separate
77 * lines. Line terminators will be regarded as spaces, and
78 * spaces at beginning of line will be stripped.
79 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
80 * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
81 * @author Elena Litani IBM
82 * @see Serializer
83 */
84 public class XML11Serializer
85 extends XMLSerializer {
86
87 //
88 // constants
89 //
90
91 protected static final boolean DEBUG = false;
92
93 //
94 // data
95 //
96
97 //
98 // DOM Level 3 implementation: variables intialized in DOMSerializerImpl
99 //
100
101 /** stores namespaces in scope */
102 protected NamespaceSupport fNSBinder;
103
104 /** stores all namespace bindings on the current element */
105 protected NamespaceSupport fLocalNSBinder;
106
107 /** symbol table for serialization */
108 protected SymbolTable fSymbolTable;
109
110 // is node dom level 1 node?
111 protected boolean fDOML1 = false;
112 // counter for new prefix names
113 protected int fNamespaceCounter = 1;
114 protected final static String PREFIX = "NS";
115
116 /**
117 * Controls whether namespace fixup should be performed during
118 * the serialization.
119 * NOTE: if this field is set to true the following
120 * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
121 * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
122 */
123 protected boolean fNamespaces = false;
124
125
126 private boolean fPreserveSpace;
127
128
129 /**
130 * Constructs a new serializer. The serializer cannot be used without
131 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
132 * first.
133 */
134 public XML11Serializer() {
135 super( );
136 _format.setVersion("1.1");
137 }
138
139
140 /**
141 * Constructs a new serializer. The serializer cannot be used without
142 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
143 * first.
144 */
145 public XML11Serializer( OutputFormat format ) {
146 super( format );
147 _format.setVersion("1.1");
148 }
149
150
151 /**
152 * Constructs a new serializer that writes to the specified writer
153 * using the specified output format. If <tt>format</tt> is null,
154 * will use a default output format.
155 *
156 * @param writer The writer to use
157 * @param format The output format to use, null for the default
158 */
159 public XML11Serializer( Writer writer, OutputFormat format ) {
160 super( writer, format );
161 _format.setVersion("1.1");
162 }
163
164
165 /**
166 * Constructs a new serializer that writes to the specified output
167 * stream using the specified output format. If <tt>format</tt>
168 * is null, will use a default output format.
169 *
170 * @param output The output stream to use
171 * @param format The output format to use, null for the default
172 */
173 public XML11Serializer( OutputStream output, OutputFormat format ) {
174 super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
175 _format.setVersion("1.1");
176 }
177
178 //-----------------------------------------//
179 // SAX content handler serializing methods //
180 //-----------------------------------------//
181
182
183 public void characters( char[] chars, int start, int length )
184 throws SAXException
185 {
186 ElementState state;
187
188 try {
189 state = content();
190
191 // Check if text should be print as CDATA section or unescaped
192 // based on elements listed in the output format (the element
193 // state) or whether we are inside a CDATA section or entity.
194
195 if ( state.inCData || state.doCData ) {
196 int saveIndent;
197
198 // Print a CDATA section. The text is not escaped, but ']]>'
199 // appearing in the code must be identified and dealt with.
200 // The contents of a text node is considered space preserving.
201 if ( ! state.inCData ) {
202 _printer.printText( "<![CDATA[" );
203 state.inCData = true;
204 }
205 saveIndent = _printer.getNextIndent();
206 _printer.setNextIndent( 0 );
207 char ch;
208 final int end = start + length;
209 for ( int index = start; index < end; ++index ) {
210 ch = chars[index];
211 if ( ch == ']' && index + 2 < end &&
212 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
213 _printer.printText("]]]]><![CDATA[>");
214 index +=2;
215 continue;
216 }
217 if (!XML11Char.isXML11Valid(ch)) {
218 // check if it is surrogate
219 if (++index < end) {
220 surrogates(ch, chars[index]);
221 }
222 else {
223 fatalError("The character '"+(char)ch+"' is an invalid XML character");
224 }
225 continue;
226 } else {
227 if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
228 _printer.printText((char)ch);
229 } else {
230 // The character is not printable -- split CDATA section
231 _printer.printText("]]>&#x");
232 _printer.printText(Integer.toHexString(ch));
233 _printer.printText(";<![CDATA[");
234 }
235 }
236 }
237 _printer.setNextIndent( saveIndent );
238
239 } else {
240
241 int saveIndent;
242
243 if ( state.preserveSpace ) {
244 // If preserving space then hold of indentation so no
245 // excessive spaces are printed at line breaks, escape
246 // the text content without replacing spaces and print
247 // the text breaking only at line breaks.
248 saveIndent = _printer.getNextIndent();
249 _printer.setNextIndent( 0 );
250 printText( chars, start, length, true, state.unescaped );
251 _printer.setNextIndent( saveIndent );
252 } else {
253 printText( chars, start, length, false, state.unescaped );
254 }
255 }
256 } catch ( IOException except ) {
257 throw new SAXException( except );
258 }
259 }
260
261
262 //
263 // overwrite printing functions to make sure serializer prints out valid XML
264 //
265 protected void printEscaped( String source ) throws IOException {
266 int length = source.length();
267 for ( int i = 0 ; i < length ; ++i ) {
268 int ch = source.charAt(i);
269 if (!XML11Char.isXML11Valid(ch)) {
270 if (++i <length) {
271 surrogates(ch, source.charAt(i));
272 } else {
273 fatalError("The character '"+(char)ch+"' is an invalid XML character");
274 }
275 continue;
276 }
277 if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){
278 printHex(ch);
279 } else if (ch == '<') {
280 _printer.printText("<");
281 } else if (ch == '&') {
282 _printer.printText("&");
283 } else if (ch == '"') {
284 _printer.printText(""");
285 } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
286 _printer.printText((char) ch);
287 } else {
288 printHex(ch);
289 }
290 }
291 }
292
293 protected final void printCDATAText(String text) throws IOException {
294 int length = text.length();
295 char ch;
296
297 for (int index = 0; index < length; ++index) {
298 ch = text.charAt(index);
299
300 if (ch == ']'
301 && index + 2 < length
302 && text.charAt(index + 1) == ']'
303 && text.charAt(index + 2) == '>') { // check for ']]>'
304 if (fDOMErrorHandler != null){
305 // REVISIT: this means that if DOM Error handler is not registered we don't report any
306 // fatal errors and might serialize not wellformed document
307 if ((features & DOMSerializerImpl.SPLITCDATA) == 0
308 && (features & DOMSerializerImpl.WELLFORMED) == 0) {
309 // issue fatal error
310 String msg =
311 DOMMessageFormatter.formatMessage(
312 DOMMessageFormatter.SERIALIZER_DOMAIN,
313 "EndingCDATA",
314 null);
315 modifyDOMError(
316 msg,
317 DOMError.SEVERITY_FATAL_ERROR,
318 null, fCurrentNode);
319 boolean continueProcess =
320 fDOMErrorHandler.handleError(fDOMError);
321 if (!continueProcess) {
322 throw new IOException();
323 }
324 } else {
325 // issue warning
326 String msg =
327 DOMMessageFormatter.formatMessage(
328 DOMMessageFormatter.SERIALIZER_DOMAIN,
329 "SplittingCDATA",
330 null);
331 modifyDOMError(
332 msg,
333 DOMError.SEVERITY_WARNING,
334 null, fCurrentNode);
335 fDOMErrorHandler.handleError(fDOMError);
336 }
337 }
338 // split CDATA section
339 _printer.printText("]]]]><![CDATA[>");
340 index += 2;
341 continue;
342 }
343
344 if (!XML11Char.isXML11Valid(ch)) {
345 // check if it is surrogate
346 if (++index < length) {
347 surrogates(ch, text.charAt(index));
348 } else {
349 fatalError(
350 "The character '"
351 + (char) ch
352 + "' is an invalid XML character");
353 }
354 continue;
355 } else {
356 if (_encodingInfo.isPrintable((char) ch)
357 && XML11Char.isXML11ValidLiteral(ch)) {
358 _printer.printText((char) ch);
359 } else {
360
361 // The character is not printable -- split CDATA section
362 _printer.printText("]]>&#x");
363 _printer.printText(Integer.toHexString(ch));
364 _printer.printText(";<![CDATA[");
365 }
366 }
367 }
368 }
369
370
371 // note that this "int" should, in all cases, be a char.
372 // REVISIT: make it a char...
373 protected final void printXMLChar( int ch ) throws IOException {
374
375 if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
376 printHex(ch);
377 } else if ( ch == '<') {
378 _printer.printText("<");
379 } else if (ch == '&') {
380 _printer.printText("&");
381 } else if (ch == '>'){
382 // character sequence "]]>" can't appear in content, therefore
383 // we should escape '>'
384 _printer.printText(">");
385 } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
386 _printer.printText((char)ch);
387 } else {
388 printHex(ch);
389 }
390 }
391
392
393
394 protected final void surrogates(int high, int low) throws IOException{
395 if (XMLChar.isHighSurrogate(high)) {
396 if (!XMLChar.isLowSurrogate(low)) {
397 //Invalid XML
398 fatalError("The character '"+(char)low+"' is an invalid XML character");
399 }
400 else {
401 int supplemental = XMLChar.supplemental((char)high, (char)low);
402 if (!XML11Char.isXML11Valid(supplemental)) {
403 //Invalid XML
404 fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
405 }
406 else {
407 if (content().inCData ) {
408 _printer.printText("]]>&#x");
409 _printer.printText(Integer.toHexString(supplemental));
410 _printer.printText(";<![CDATA[");
411 }
412 else {
413 printHex(supplemental);
414 }
415 }
416 }
417 } else {
418 fatalError("The character '"+(char)high+"' is an invalid XML character");
419 }
420
421 }
422
423
424 protected void printText( String text, boolean preserveSpace, boolean unescaped )
425 throws IOException {
426 int index;
427 char ch;
428 int length = text.length();
429 if ( preserveSpace ) {
430 // Preserving spaces: the text must print exactly as it is,
431 // without breaking when spaces appear in the text and without
432 // consolidating spaces. If a line terminator is used, a line
433 // break will occur.
434 for ( index = 0 ; index < length ; ++index ) {
435 ch = text.charAt( index );
436 if (!XML11Char.isXML11Valid(ch)) {
437 // check if it is surrogate
438 if (++index <length) {
439 surrogates(ch, text.charAt(index));
440 } else {
441 fatalError("The character '"+(char)ch+"' is an invalid XML character");
442 }
443 continue;
444 }
445 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
446 _printer.printText( ch );
447 } else
448 printXMLChar( ch );
449 }
450 } else {
451 // Not preserving spaces: print one part at a time, and
452 // use spaces between parts to break them into different
453 // lines. Spaces at beginning of line will be stripped
454 // by printing mechanism. Line terminator is treated
455 // no different than other text part.
456 for ( index = 0 ; index < length ; ++index ) {
457 ch = text.charAt( index );
458 if (!XML11Char.isXML11Valid(ch)) {
459 // check if it is surrogate
460 if (++index <length) {
461 surrogates(ch, text.charAt(index));
462 } else {
463 fatalError("The character '"+(char)ch+"' is an invalid XML character");
464 }
465 continue;
466 }
467
468 if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
469 _printer.printText( ch );
470 else
471 printXMLChar( ch);
472 }
473 }
474 }
475
476
477
478 protected void printText( char[] chars, int start, int length,
479 boolean preserveSpace, boolean unescaped ) throws IOException {
480 int index;
481 char ch;
482
483 if ( preserveSpace ) {
484 // Preserving spaces: the text must print exactly as it is,
485 // without breaking when spaces appear in the text and without
486 // consolidating spaces. If a line terminator is used, a line
487 // break will occur.
488 while ( length-- > 0 ) {
489 ch = chars[start++];
490 if (!XML11Char.isXML11Valid(ch)) {
491 // check if it is surrogate
492 if ( length-- > 0) {
493 surrogates(ch, chars[start++]);
494 } else {
495 fatalError("The character '"+(char)ch+"' is an invalid XML character");
496 }
497 continue;
498 }
499 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
500 _printer.printText( ch );
501 else
502 printXMLChar( ch );
503 }
504 } else {
505 // Not preserving spaces: print one part at a time, and
506 // use spaces between parts to break them into different
507 // lines. Spaces at beginning of line will be stripped
508 // by printing mechanism. Line terminator is treated
509 // no different than other text part.
510 while ( length-- > 0 ) {
511 ch = chars[start++];
512 if (!XML11Char.isXML11Valid(ch)) {
513 // check if it is surrogate
514 if ( length-- > 0) {
515 surrogates(ch, chars[start++]);
516 } else {
517 fatalError("The character '"+(char)ch+"' is an invalid XML character");
518 }
519 continue;
520 }
521
522 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
523 _printer.printText( ch );
524 else
525 printXMLChar( ch );
526 }
527 }
528 }
529
530
531 public boolean reset() {
532 super.reset();
533 return true;
534
535 }
536
537 }