Source code: org/jdom/output/Format.java
1 /*--
2
3 $Id: Format.java,v 1.10 2004/09/07 06:37:20 jhunter Exp $
4
5 Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions, and the following disclaimer.
14
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions, and the disclaimer that follows
17 these conditions in the documentation and/or other materials
18 provided with the distribution.
19
20 3. The name "JDOM" must not be used to endorse or promote products
21 derived from this software without prior written permission. For
22 written permission, please contact <request_AT_jdom_DOT_org>.
23
24 4. Products derived from this software may not be called "JDOM", nor
25 may "JDOM" appear in their name, without prior written permission
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28 In addition, we request (but do not require) that you include in the
29 end-user documentation provided with the redistribution and/or in the
30 software itself an acknowledgement equivalent to the following:
31 "This product includes software developed by the
32 JDOM Project (http://www.jdom.org/)."
33 Alternatively, the acknowledgment may be graphical using the logos
34 available at http://www.jdom.org/images/logos.
35
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 SUCH DAMAGE.
48
49 This software consists of voluntary contributions made by many
50 individuals on behalf of the JDOM Project and was originally
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53 on the JDOM Project, please see <http://www.jdom.org/>.
54
55 */
56
57 package org.jdom.output;
58
59 import java.lang.reflect.Method;
60
61 /**
62 * Class to encapsulate XMLOutputter format options.
63 * Typical users can use the standard format configurations obtained by
64 * {@link #getRawFormat} (no whitespace changes),
65 * {@link #getPrettyFormat} (whitespace beautification), and
66 * {@link #getCompactFormat} (whitespace normalization).
67 * <p>
68 * Several modes are available to effect the way textual content is printed.
69 * See the documentation for {@link TextMode} for details.
70 *
71 * @version $Revision: 1.10 $, $Date: 2004/09/07 06:37:20 $
72 * @author Jason Hunter
73 */
74 public class Format implements Cloneable {
75
76 private static final String CVS_ID =
77 "@(#) $RCSfile: Format.java,v $ $Revision: 1.10 $ $Date: 2004/09/07 06:37:20 $ $Name: jdom_1_0 $";
78
79 /**
80 * Returns a new Format object that performs no whitespace changes, uses
81 * the UTF-8 encoding, doesn't expand empty elements, includes the
82 * declaration and encoding, and uses the default entity escape strategy.
83 * Tweaks can be made to the returned Format instance without affecting
84 * other instances.
85
86 * @return a Format with no whitespace changes
87 */
88 public static Format getRawFormat() {
89 return new Format();
90 }
91
92 /**
93 * Returns a new Format object that performs whitespace beautification with
94 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
95 * includes the declaration and encoding, and uses the default entity
96 * escape strategy.
97 * Tweaks can be made to the returned Format instance without affecting
98 * other instances.
99 *
100 * @return a Format with whitespace beautification
101 */
102 public static Format getPrettyFormat() {
103 Format f = new Format();
104 f.setIndent(STANDARD_INDENT);
105 f.setTextMode(TextMode.TRIM);
106 return f;
107 }
108
109 /**
110 * Returns a new Format object that performs whitespace normalization, uses
111 * the UTF-8 encoding, doesn't expand empty elements, includes the
112 * declaration and encoding, and uses the default entity escape strategy.
113 * Tweaks can be made to the returned Format instance without affecting
114 * other instances.
115 *
116 * @return a Format with whitespace normalization
117 */
118 public static Format getCompactFormat() {
119 Format f = new Format();
120 f.setTextMode(TextMode.NORMALIZE);
121 return f;
122 }
123
124 /** standard value to indent by, if we are indenting */
125 private static final String STANDARD_INDENT = " ";
126
127 /** standard string with which to end a line */
128 private static final String STANDARD_LINE_SEPARATOR = "\r\n";
129
130 /** standard encoding */
131 private static final String STANDARD_ENCODING = "UTF-8";
132
133
134 /** The default indent is no spaces (as original document) */
135 String indent = null;
136
137 /** New line separator */
138 String lineSeparator = STANDARD_LINE_SEPARATOR;
139
140 /** The encoding format */
141 String encoding = STANDARD_ENCODING;
142
143 /** Whether or not to output the XML declaration
144 * - default is <code>false</code> */
145 boolean omitDeclaration = false;
146
147 /** Whether or not to output the encoding in the XML declaration
148 * - default is <code>false</code> */
149 boolean omitEncoding = false;
150
151 /** Whether or not to expand empty elements to
152 * <tagName></tagName> - default is <code>false</code> */
153 boolean expandEmptyElements = false;
154
155 /** Whether TrAX output escaping disabling/enabling PIs are ignored
156 * or processed - default is <code>false</code> */
157 boolean ignoreTrAXEscapingPIs = false;
158
159 /** text handling mode */
160 TextMode mode = TextMode.PRESERVE;
161
162 /** entity escape logic */
163 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);
164
165 /**
166 * Creates a new Format instance with default (raw) behavior.
167 */
168 private Format() { }
169
170 /**
171 * Sets the {@link EscapeStrategy} to use for character escaping.
172 *
173 * @param strategy the EscapeStrategy to use
174 * @return a pointer to this Format for chaining
175 */
176 public Format setEscapeStrategy(EscapeStrategy strategy) {
177 escapeStrategy = strategy;
178 return this;
179 }
180
181 /**
182 * Returns the current escape strategy
183 *
184 * @return the current escape strategy
185 */
186 public EscapeStrategy getEscapeStrategy() {
187 return escapeStrategy;
188 }
189
190 /**
191 * This will set the newline separator (<code>lineSeparator</code>).
192 * The default is <code>\r\n</code>. Note that if the "newlines"
193 * property is false, this value is irrelevant. To make it output
194 * the system default line ending string, call
195 * <code>setLineSeparator(System.getProperty("line.separator"))</code>
196 *
197 * <p>
198 * To output "UNIX-style" documents, call
199 * <code>setLineSeparator("\n")</code>. To output "Mac-style"
200 * documents, call <code>setLineSeparator("\r")</code>. DOS-style
201 * documents use CR-LF ("\r\n"), which is the default.
202 * </p>
203 *
204 * <p>
205 * Note that this only applies to newlines generated by the
206 * outputter. If you parse an XML document that contains newlines
207 * embedded inside a text node, and you do not set TextMode.NORMALIZE,
208 * then the newlines will be output
209 * verbatim, as "\n" which is how parsers normalize them.
210 * </p>
211 *
212 * @see #setTextMode
213 *
214 * @param separator <code>String</code> line separator to use.
215 * @return a pointer to this Format for chaining
216 */
217 public Format setLineSeparator(String separator) {
218 this.lineSeparator = separator;
219 return this;
220 }
221
222 /**
223 * Returns the current line separator.
224 *
225 * @return the current line separator
226 */
227 public String getLineSeparator() {
228 return lineSeparator;
229 }
230
231 /**
232 * This will set whether the XML declaration
233 * (<code><?xml version="1.0"
234 * encoding="UTF-8"?></code>)
235 * includes the encoding of the document. It is common to omit
236 * this in uses such as WML and other wireless device protocols.
237 *
238 * @param omitEncoding <code>boolean</code> indicating whether or not
239 * the XML declaration should indicate the document encoding.
240 * @return a pointer to this Format for chaining
241 */
242 public Format setOmitEncoding(boolean omitEncoding) {
243 this.omitEncoding = omitEncoding;
244 return this;
245 }
246
247 /**
248 * Returns whether the XML declaration encoding will be omitted.
249 *
250 * @return whether the XML declaration encoding will be omitted
251 */
252 public boolean getOmitEncoding() {
253 return omitEncoding;
254 }
255
256 /**
257 * This will set whether the XML declaration
258 * (<code><?xml version="1.0"?gt;</code>)
259 * will be omitted or not. It is common to omit this in uses such
260 * as SOAP and XML-RPC calls.
261 *
262 * @param omitDeclaration <code>boolean</code> indicating whether or not
263 * the XML declaration should be omitted.
264 * @return a pointer to this Format for chaining
265 */
266 public Format setOmitDeclaration(boolean omitDeclaration) {
267 this.omitDeclaration = omitDeclaration;
268 return this;
269 }
270
271 /**
272 * Returns whether the XML declaration will be omitted.
273 *
274 * @return whether the XML declaration will be omitted
275 */
276 public boolean getOmitDeclaration() {
277 return omitDeclaration;
278 }
279
280 /**
281 * This will set whether empty elements are expanded from
282 * <code><tagName/></code> to
283 * <code><tagName></tagName></code>.
284 *
285 * @param expandEmptyElements <code>boolean</code> indicating whether or not
286 * empty elements should be expanded.
287 * @return a pointer to this Format for chaining
288 */
289 public Format setExpandEmptyElements(boolean expandEmptyElements) {
290 this.expandEmptyElements = expandEmptyElements;
291 return this;
292 }
293
294 /**
295 * Returns whether empty elements are expanded.
296 *
297 * @return whether empty elements are expanded
298 */
299 public boolean getExpandEmptyElements() {
300 return expandEmptyElements;
301 }
302
303 /**
304 * This will set whether JAXP TrAX processing instructions for
305 * disabling/enabling output escaping are ignored. Disabling
306 * output escaping allows using XML text as element content and
307 * outputing it verbatim, i.e. as element children would be.
308 * <p>
309 * When processed, these processing instructions are removed from
310 * the generated XML text and control whether the element text
311 * content is output verbatim or with escaping of the pre-defined
312 * entities in XML 1.0. The text to be output verbatim shall be
313 * surrounded by the
314 * <code><?javax.xml.transform.disable-output-escaping ?></code>
315 * and <code><?javax.xml.transform.enable-output-escaping ?></code>
316 * PIs.</p>
317 * <p>
318 * When ignored, the processing instructions are present in the
319 * generated XML text and the pre-defined entities in XML 1.0 are
320 * escaped.
321 * <p>
322 * Default: <code>false</code>.</p>
323 *
324 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
325 * whether or not TrAX ouput escaping PIs are ignored.
326 *
327 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
328 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
329 */
330 public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
331 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
332 }
333
334 /**
335 * Returns whether JAXP TrAX processing instructions for
336 * disabling/enabling output escaping are ignored.
337 *
338 * @return whether or not TrAX ouput escaping PIs are ignored.
339 */
340 public boolean getIgnoreTrAXEscapingPIs() {
341 return ignoreTrAXEscapingPIs;
342 }
343
344 /**
345 * This sets the text output style. Options are available as static
346 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
347 *
348 * @return a pointer to this Format for chaining
349 */
350 public Format setTextMode(Format.TextMode mode) {
351 this.mode = mode;
352 return this;
353 }
354
355 /**
356 * Returns the current text output style.
357 *
358 * @return the current text output style
359 */
360 public Format.TextMode getTextMode() {
361 return mode;
362 }
363
364 /**
365 * This will set the indent <code>String</code> to use; this
366 * is usually a <code>String</code> of empty spaces. If you pass
367 * null, or the empty string (""), then no indentation will
368 * happen. Default: none (null)
369 *
370 * @param indent <code>String</code> to use for indentation.
371 * @return a pointer to this Format for chaining
372 */
373 public Format setIndent(String indent) {
374 // if passed the empty string, change it to null, for marginal
375 // performance gains later (can compare to null first instead
376 // of calling equals())
377 if ("".equals(indent)) {
378 indent = null;
379 }
380 this.indent = indent;
381 return this;
382 }
383
384 /**
385 * Returns the indent string in use.
386 *
387 * @return the indent string in use
388 */
389 public String getIndent() {
390 return indent;
391 }
392
393 /**
394 * Sets the output encoding. The name should be an accepted XML
395 * encoding.
396 *
397 * @param encoding the encoding format. Use XML-style names like
398 * "UTF-8" or "ISO-8859-1" or "US-ASCII"
399 * @return a pointer to this Format for chaining
400 */
401 public Format setEncoding(String encoding) {
402 this.encoding = encoding;
403 escapeStrategy = new DefaultEscapeStrategy(encoding);
404 return this;
405 }
406
407 /**
408 * Returns the configured output encoding.
409 *
410 * @return the output encoding
411 */
412 public String getEncoding() {
413 return encoding;
414 }
415
416 protected Object clone() {
417 Format format = null;
418
419 try {
420 format = (Format) super.clone();
421 }
422 catch (CloneNotSupportedException ce) {
423 }
424
425 return format;
426 }
427
428
429 /**
430 * Handle common charsets quickly and easily. Use reflection
431 * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
432 * If JDK 1.4 isn't around, default to no special encoding.
433 */
434 class DefaultEscapeStrategy implements EscapeStrategy {
435 private int bits;
436 Object encoder;
437 Method canEncode;
438
439 public DefaultEscapeStrategy(String encoding) {
440 if ("UTF-8".equalsIgnoreCase(encoding) ||
441 "UTF-16".equalsIgnoreCase(encoding)) {
442 bits = 16;
443 }
444 else if ("ISO-8859-1".equalsIgnoreCase(encoding) ||
445 "Latin1".equalsIgnoreCase(encoding)) {
446 bits = 8;
447 }
448 else if ("US-ASCII".equalsIgnoreCase(encoding) ||
449 "ASCII".equalsIgnoreCase(encoding)) {
450 bits = 7;
451 }
452 else {
453 bits = 0;
454 //encoder = Charset.forName(encoding).newEncoder();
455 try {
456 Class charsetClass = Class.forName("java.nio.charset.Charset");
457 Class encoderClass = Class.forName("java.nio.charset.CharsetEncoder");
458 Method forName = charsetClass.getMethod("forName", new Class[]{String.class});
459 Object charsetObj = forName.invoke(null, new Object[]{encoding});
460 Method newEncoder = charsetClass.getMethod("newEncoder", null);
461 encoder = newEncoder.invoke(charsetObj, null);
462 canEncode = encoderClass.getMethod("canEncode", new Class[]{char.class});
463 }
464 catch (Exception ignored) {
465 }
466 }
467 }
468
469 public boolean shouldEscape(char ch) {
470 if (bits == 16) {
471 return false;
472 }
473 if (bits == 8) {
474 if ((int) ch > 255)
475 return true;
476 else
477 return false;
478 }
479 if (bits == 7) {
480 if ((int) ch > 127)
481 return true;
482 else
483 return false;
484 }
485 else {
486 if (canEncode != null && encoder != null) {
487 try {
488 Boolean val = (Boolean) canEncode.invoke(encoder, new Object[]{new Character(ch)});
489 return !val.booleanValue();
490 }
491 catch (Exception ignored) {
492 }
493 }
494 // Return false if we don't know. This risks not escaping
495 // things which should be escaped, but also means people won't
496 // start getting loads of unnecessary escapes.
497 return false;
498 }
499 }
500 }
501
502
503 /**
504 * Class to signify how text should be handled on output. The following
505 * table provides details.
506 *
507 * <table>
508 * <tr>
509 * <th align="left">
510 * Text Mode
511 * </th>
512 * <th>
513 * Resulting behavior.
514 * </th>
515 * </tr>
516 *
517 * <tr valign="top">
518 * <td>
519 * <i>PRESERVE (Default)</i>
520 * </td>
521 * <td>
522 * All content is printed in the format it was created, no whitespace
523 * or line separators are are added or removed.
524 * </td>
525 * </tr>
526 *
527 * <tr valign="top">
528 * <td>
529 * TRIM_FULL_WHITE
530 * </td>
531 * <td>
532 * Content between tags consisting of all whitespace is not printed.
533 * If the content contains even one non-whitespace character, it is
534 * printed verbatim, whitespace and all.
535 * </td>
536 * </tr>
537 *
538 * <tr valign="top">
539 * <td>
540 * TRIM
541 * </td>
542 * <td>
543 * Same as TrimAllWhite, plus leading/trailing whitespace are
544 * trimmed.
545 * </td>
546 * </tr>
547 *
548 * <tr valign="top">
549 * <td>
550 * NORMALIZE
551 * </td>
552 * <td>
553 * Same as TextTrim, plus addition interior whitespace is compressed
554 * to a single space.
555 * </td>
556 * </tr>
557 * </table>
558 *
559 * In most cases textual content is aligned with the surrounding tags
560 * (after the appropriate text mode is applied). In the case where the only
561 * content between the start and end tags is textual, the start tag, text,
562 * and end tag are all printed on the same line. If the document being
563 * output already has whitespace, it's wise to turn on TRIM mode so the
564 * pre-existing whitespace can be trimmed before adding new whitespace.
565 * <p>
566 * When a element has a xml:space attribute with the value of "preserve",
567 * all formating is turned off and reverts back to the default until the
568 * element and its contents have been printed. If a nested element contains
569 * another xml:space with the value "default" formatting is turned back on
570 * for the child element and then off for the remainder of the parent
571 * element.
572 */
573 public static class TextMode {
574 /**
575 * Mode for literal text preservation.
576 */
577 public static final TextMode PRESERVE = new TextMode("PRESERVE");
578
579 /**
580 * Mode for text trimming (left and right trim).
581 */
582 public static final TextMode TRIM = new TextMode("TRIM");
583
584 /**
585 * Mode for text normalization (left and right trim plus internal
586 * whitespace is normalized to a single space.
587 * @see org.jdom.Element#getTextNormalize
588 */
589 public static final TextMode NORMALIZE = new TextMode("NORMALIZE");
590
591 /**
592 * Mode for text trimming of content consisting of nothing but
593 * whitespace but otherwise not changing output.
594 */
595 public static final TextMode TRIM_FULL_WHITE =
596 new TextMode("TRIM_FULL_WHITE");
597
598 private final String name;
599
600 private TextMode(String name) {
601 this.name = name;
602 }
603
604 public String toString() {
605 return name;
606 }
607 }
608 }