Source code: org/jdom/input/SAXBuilder.java
1 /*--
2
3 $Id: SAXBuilder.java,v 1.89 2004/09/03 18:24:28 jhunter Exp $
4
5 Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions, and the following disclaimer.
14
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions, and the disclaimer that follows
17 these conditions in the documentation and/or other materials
18 provided with the distribution.
19
20 3. The name "JDOM" must not be used to endorse or promote products
21 derived from this software without prior written permission. For
22 written permission, please contact <request_AT_jdom_DOT_org>.
23
24 4. Products derived from this software may not be called "JDOM", nor
25 may "JDOM" appear in their name, without prior written permission
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28 In addition, we request (but do not require) that you include in the
29 end-user documentation provided with the redistribution and/or in the
30 software itself an acknowledgement equivalent to the following:
31 "This product includes software developed by the
32 JDOM Project (http://www.jdom.org/)."
33 Alternatively, the acknowledgment may be graphical using the logos
34 available at http://www.jdom.org/images/logos.
35
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 SUCH DAMAGE.
48
49 This software consists of voluntary contributions made by many
50 individuals on behalf of the JDOM Project and was originally
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53 on the JDOM Project, please see <http://www.jdom.org/>.
54
55 */
56
57 package org.jdom.input;
58
59 import java.io.*;
60 import java.lang.reflect.*;
61 import java.net.*;
62 import java.util.*;
63
64 import org.jdom.*;
65
66 import org.xml.sax.*;
67 import org.xml.sax.helpers.XMLReaderFactory;
68
69 /**
70 * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link
71 * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a
72 * third-party SAX parser (chosen by JAXP by default, or you can choose
73 * manually) to handle the parsing duties and simply listens to the SAX events
74 * to construct a document. Details which SAX does not provide, such as
75 * whitespace outside the root element, are not represented in the JDOM
76 * document. Information about SAX can be found at <a
77 * href="http://www.saxproject.org">http://www.saxproject.org</a>.
78 * <p>
79 * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may
80 * be converted by the SAX parser into absolute paths.
81 *
82 * @version $Revision: 1.89 $, $Date: 2004/09/03 18:24:28 $
83 * @author Jason Hunter
84 * @author Brett McLaughlin
85 * @author Dan Schaffer
86 * @author Philip Nelson
87 * @author Alex Rosen
88 */
89 public class SAXBuilder {
90
91 private static final String CVS_ID =
92 "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.89 $ $Date: 2004/09/03 18:24:28 $ $Name: jdom_1_0 $";
93
94 /**
95 * Default parser class to use. This is used when no other parser
96 * is given and JAXP isn't available.
97 */
98 private static final String DEFAULT_SAX_DRIVER =
99 "org.apache.xerces.parsers.SAXParser";
100
101 /** Whether validation should occur */
102 private boolean validate;
103
104 /** Whether expansion of entities should occur */
105 private boolean expand = true;
106
107 /** Adapter class to use */
108 private String saxDriverClass;
109
110 /** ErrorHandler class to use */
111 private ErrorHandler saxErrorHandler = null;
112
113 /** EntityResolver class to use */
114 private EntityResolver saxEntityResolver = null;
115
116 /** DTDHandler class to use */
117 private DTDHandler saxDTDHandler = null;
118
119 /** XMLFilter instance to use */
120 private XMLFilter saxXMLFilter = null;
121
122 /** The factory for creating new JDOM objects */
123 private JDOMFactory factory = new DefaultJDOMFactory();
124
125 /** Whether to ignore ignorable whitespace */
126 private boolean ignoringWhite = false;
127
128 /** User-specified features to be set on the SAX parser */
129 private HashMap features = new HashMap(5);
130
131 /** User-specified properties to be set on the SAX parser */
132 private HashMap properties = new HashMap(5);
133
134 /**
135 * Whether parser reuse is allowed.
136 * <p>Default: <code>true</code></p>
137 */
138 private boolean reuseParser = true;
139
140 /** The current SAX parser, if parser reuse has been activated. */
141 private XMLReader saxParser = null;
142
143 /**
144 * Creates a new SAXBuilder which will attempt to first locate
145 * a parser via JAXP, then will try to use a set of default
146 * SAX Drivers. The underlying parser will not validate.
147 */
148 public SAXBuilder() {
149 this(false);
150 }
151
152 /**
153 * Creates a new SAXBuilder which will attempt to first locate
154 * a parser via JAXP, then will try to use a set of default
155 * SAX Drivers. The underlying parser will validate or not
156 * according to the given parameter.
157 *
158 * @param validate <code>boolean</code> indicating if
159 * validation should occur.
160 */
161 public SAXBuilder(boolean validate) {
162 this.validate = validate;
163 }
164
165 /**
166 * Creates a new SAXBuilder using the specified SAX parser.
167 * The underlying parser will not validate.
168 *
169 * @param saxDriverClass <code>String</code> name of SAX Driver
170 * to use for parsing.
171 */
172 public SAXBuilder(String saxDriverClass) {
173 this(saxDriverClass, false);
174 }
175
176 /**
177 * Creates a new SAXBuilder using the specified SAX parser.
178 * The underlying parser will validate or not
179 * according to the given parameter.
180 *
181 * @param saxDriverClass <code>String</code> name of SAX Driver
182 * to use for parsing.
183 * @param validate <code>boolean</code> indicating if
184 * validation should occur.
185 */
186 public SAXBuilder(String saxDriverClass, boolean validate) {
187 this.saxDriverClass = saxDriverClass;
188 this.validate = validate;
189 }
190
191 /**
192 * Returns the driver class assigned in the constructor, or null if none.
193 *
194 * @return the driver class assigned in the constructor
195 */
196 public String getDriverClass() {
197 return saxDriverClass;
198 }
199
200 /**
201 * Returns the current {@link org.jdom.JDOMFactory} in use.
202 * @return the factory in use
203 */
204 public JDOMFactory getFactory() {
205 return factory;
206 }
207
208 /**
209 * This sets a custom JDOMFactory for the builder. Use this to build
210 * the tree with your own subclasses of the JDOM classes.
211 *
212 * @param factory <code>JDOMFactory</code> to use
213 */
214 public void setFactory(JDOMFactory factory) {
215 this.factory = factory;
216 }
217
218 /**
219 * Returns whether validation is to be performed during the build.
220 *
221 * @return whether validation is to be performed during the build
222 */
223 public boolean getValidation() {
224 return validate;
225 }
226
227 /**
228 * This sets validation for the builder.
229 *
230 * @param validate <code>boolean</code> indicating whether validation
231 * should occur.
232 */
233 public void setValidation(boolean validate) {
234 this.validate = validate;
235 }
236
237 /**
238 * Returns the {@link ErrorHandler} assigned, or null if none.
239 * @return the ErrorHandler assigned, or null if none
240 */
241 public ErrorHandler getErrorHandler() {
242 return saxErrorHandler;
243 }
244
245 /**
246 * This sets custom ErrorHandler for the <code>Builder</code>.
247 *
248 * @param errorHandler <code>ErrorHandler</code>
249 */
250 public void setErrorHandler(ErrorHandler errorHandler) {
251 saxErrorHandler = errorHandler;
252 }
253
254 /**
255 * Returns the {@link EntityResolver} assigned, or null if none.
256 *
257 * @return the EntityResolver assigned
258 */
259 public EntityResolver getEntityResolver() {
260 return saxEntityResolver;
261 }
262
263 /**
264 * This sets custom EntityResolver for the <code>Builder</code>.
265 *
266 * @param entityResolver <code>EntityResolver</code>
267 */
268 public void setEntityResolver(EntityResolver entityResolver) {
269 saxEntityResolver = entityResolver;
270 }
271
272 /**
273 * Returns the {@link DTDHandler} assigned, or null if none.
274 *
275 * @return the DTDHandler assigned
276 */
277 public DTDHandler getDTDHandler() {
278 return saxDTDHandler;
279 }
280
281 /**
282 * This sets custom DTDHandler for the <code>Builder</code>.
283 *
284 * @param dtdHandler <code>DTDHandler</code>
285 */
286 public void setDTDHandler(DTDHandler dtdHandler) {
287 saxDTDHandler = dtdHandler;
288 }
289
290 /**
291 * Returns the {@link XMLFilter} used during parsing, or null if none.
292 *
293 * @return the XMLFilter used during parsing
294 */
295 public XMLFilter getXMLFilter() {
296 return saxXMLFilter;
297 }
298
299 /**
300 * This sets a custom {@link org.xml.sax.XMLFilter} for the builder.
301 *
302 * @param xmlFilter the filter to use
303 */
304 public void setXMLFilter(XMLFilter xmlFilter) {
305 saxXMLFilter = xmlFilter;
306 }
307
308 /**
309 * Returns whether element content whitespace is to be ignored during the
310 * build.
311 *
312 * @return whether element content whitespace is to be ignored during the
313 * build
314 */
315 public boolean getIgnoringElementContentWhitespace() {
316 return ignoringWhite;
317 }
318
319 /**
320 * Specifies whether or not the parser should elminate whitespace in
321 * element content (sometimes known as "ignorable whitespace") when
322 * building the document. Only whitespace which is contained within
323 * element content that has an element only content model will be
324 * eliminated (see XML Rec 3.2.1). For this setting to take effect
325 * requires that validation be turned on. The default value of this
326 * setting is <code>false</code>.
327 *
328 * @param ignoringWhite Whether to ignore ignorable whitespace
329 */
330 public void setIgnoringElementContentWhitespace(boolean ignoringWhite) {
331 this.ignoringWhite = ignoringWhite;
332 }
333
334 /**
335 * Returns whether the contained SAX parser instance is reused across
336 * multiple parses. The default is true.
337 *
338 * @return whether the contained SAX parser instance is reused across
339 * multiple parses
340 */
341 public boolean getReuseParser() {
342 return reuseParser;
343 }
344
345 /**
346 * Specifies whether this builder shall reuse the same SAX parser
347 * when performing subsequent parses or allocate a new parser for
348 * each parse. The default value of this setting is
349 * <code>true</code> (parser reuse).
350 * <p>
351 * <strong>Note</strong>: As SAX parser instances are not thread safe,
352 * the parser reuse feature should not be used with SAXBuilder instances
353 * shared among threads.</p>
354 *
355 * @param reuseParser Whether to reuse the SAX parser.
356 */
357 public void setReuseParser(boolean reuseParser) {
358 this.reuseParser = reuseParser;
359 this.saxParser = null;
360 }
361
362 /**
363 * This sets a feature on the SAX parser. See the SAX documentation for
364 * </p>
365 * <p>
366 * NOTE: SAXBuilder requires that some particular features of the SAX parser be
367 * set up in certain ways for it to work properly. The list of such features
368 * may change in the future. Therefore, the use of this method may cause
369 * parsing to break, and even if it doesn't break anything today it might
370 * break parsing in a future JDOM version, because what JDOM parsers require
371 * may change over time. Use with caution.
372 * </p>
373 *
374 * @param name The feature name, which is a fully-qualified URI.
375 * @param value The requested state of the feature (true or false).
376 */
377 public void setFeature(String name, boolean value) {
378 // Save the specified feature for later.
379 features.put(name, new Boolean(value));
380 }
381
382 /**
383 * This sets a property on the SAX parser. See the SAX documentation for
384 * more information.
385 * <p>
386 * NOTE: SAXBuilder requires that some particular properties of the SAX parser be
387 * set up in certain ways for it to work properly. The list of such properties
388 * may change in the future. Therefore, the use of this method may cause
389 * parsing to break, and even if it doesn't break anything today it might
390 * break parsing in a future JDOM version, because what JDOM parsers require
391 * may change over time. Use with caution.
392 * </p>
393 *
394 * @param name The property name, which is a fully-qualified URI.
395 * @param value The requested value for the property.
396 */
397 public void setProperty(String name, Object value) {
398 // Save the specified property for later.
399 properties.put(name, value);
400 }
401
402 /**
403 * This builds a document from the supplied
404 * input source.
405 *
406 * @param in <code>InputSource</code> to read from
407 * @return <code>Document</code> resultant Document object
408 * @throws JDOMException when errors occur in parsing
409 * @throws IOException when an I/O error prevents a document
410 * from being fully parsed
411 */
412 public Document build(InputSource in)
413 throws JDOMException, IOException {
414 SAXHandler contentHandler = null;
415
416 try {
417 // Create and configure the content handler.
418 contentHandler = createContentHandler();
419 configureContentHandler(contentHandler);
420
421 XMLReader parser = this.saxParser;
422 if (parser == null) {
423 // Create and configure the parser.
424 parser = createParser();
425
426 // Install optional filter
427 if (saxXMLFilter != null) {
428 // Connect filter chain to parser
429 XMLFilter root = saxXMLFilter;
430 while (root.getParent() instanceof XMLFilter) {
431 root = (XMLFilter)root.getParent();
432 }
433 root.setParent(parser);
434
435 // Read from filter
436 parser = saxXMLFilter;
437 }
438
439 // Configure parser
440 configureParser(parser, contentHandler);
441
442 if (reuseParser == true) {
443 this.saxParser = parser;
444 }
445 }
446 else {
447 // Reset content handler as SAXHandler instances cannot
448 // be reused
449 configureParser(parser, contentHandler);
450 }
451
452 // Parse the document.
453 parser.parse(in);
454
455 return contentHandler.getDocument();
456 }
457 catch (SAXParseException e) {
458 Document doc = contentHandler.getDocument();
459 if (doc.hasRootElement() == false) {
460 doc = null;
461 }
462
463 String systemId = e.getSystemId();
464 if (systemId != null) {
465 throw new JDOMParseException("Error on line " +
466 e.getLineNumber() + " of document " + systemId, e, doc);
467 } else {
468 throw new JDOMParseException("Error on line " +
469 e.getLineNumber(), e, doc);
470 }
471 }
472 catch (SAXException e) {
473 throw new JDOMParseException("Error in building: " +
474 e.getMessage(), e, contentHandler.getDocument());
475 }
476 finally {
477 // Explicitly nullify the handler to encourage GC
478 // It's a stack var so this shouldn't be necessary, but it
479 // seems to help on some JVMs
480 contentHandler = null;
481 }
482 }
483
484 /**
485 * This creates the SAXHandler that will be used to build the Document.
486 *
487 * @return <code>SAXHandler</code> - resultant SAXHandler object.
488 */
489 protected SAXHandler createContentHandler() {
490 SAXHandler contentHandler = new SAXHandler(factory);
491 return contentHandler;
492 }
493
494 /**
495 * This configures the SAXHandler that will be used to build the Document.
496 * <p>
497 * The default implementation simply passes through some configuration
498 * settings that were set on the SAXBuilder: setExpandEntities() and
499 * setIgnoringElementContentWhitespace().
500 * </p>
501 */
502 protected void configureContentHandler(SAXHandler contentHandler) {
503 // Setup pass through behavior
504 contentHandler.setExpandEntities(expand);
505 contentHandler.setIgnoringElementContentWhitespace(ignoringWhite);
506 }
507
508 /**
509 * This creates the XMLReader to be used for reading the XML document.
510 * <p>
511 * The default behavior is to (1) use the saxDriverClass, if it has been
512 * set, (2) try to obtain a parser from JAXP, if it is available, and
513 * (3) if all else fails, use a hard-coded default parser (currently
514 * the Xerces parser). Subclasses may override this method to determine
515 * the parser to use in a different way.
516 * </p>
517 *
518 * @return <code>XMLReader</code> - resultant XMLReader object.
519 */
520 protected XMLReader createParser() throws JDOMException {
521 XMLReader parser = null;
522 if (saxDriverClass != null) {
523 // The user knows that they want to use a particular class
524 try {
525 parser = XMLReaderFactory.createXMLReader(saxDriverClass);
526
527 // Configure parser
528 setFeaturesAndProperties(parser, true);
529 }
530 catch (SAXException e) {
531 throw new JDOMException("Could not load " + saxDriverClass, e);
532 }
533 } else {
534 // Try using JAXP...
535 // Note we need JAXP 1.1, and if JAXP 1.0 is all that's
536 // available then the getXMLReader call fails and we skip
537 // to the hard coded default parser
538 try {
539 // Get factory class and method.
540 Class factoryClass =
541 Class.forName("org.jdom.input.JAXPParserFactory");
542
543 Method createParser =
544 factoryClass.getMethod("createParser",
545 new Class[] { boolean.class, Map.class, Map.class });
546
547 // Create SAX parser.
548 parser = (XMLReader)createParser.invoke(null,
549 new Object[] { new Boolean(validate),
550 features, properties });
551
552 // Configure parser
553 setFeaturesAndProperties(parser, false);
554 }
555 catch (JDOMException e) {
556 throw e;
557 }
558 catch (NoClassDefFoundError e) {
559 // The class loader failed to resolve the dependencies
560 // of org.jdom.input.JAXPParserFactory. This probably means
561 // that no JAXP parser is present in its class path.
562 // => Ignore and try allocating default SAX parser instance.
563 }
564 catch (Exception e) {
565 // Ignore and try allocating default SAX parser instance.
566 }
567 }
568
569 // Check to see if we got a parser yet, if not, try to use a
570 // hard coded default
571 if (parser == null) {
572 try {
573 parser = XMLReaderFactory.createXMLReader(DEFAULT_SAX_DRIVER);
574 // System.out.println("using default " + DEFAULT_SAX_DRIVER);
575 saxDriverClass = parser.getClass().getName();
576
577 // Configure parser
578 setFeaturesAndProperties(parser, true);
579 }
580 catch (SAXException e) {
581 throw new JDOMException("Could not load default SAX parser: "
582 + DEFAULT_SAX_DRIVER, e);
583 }
584 }
585
586 return parser;
587 }
588
589 /**
590 * This configures the XMLReader to be used for reading the XML document.
591 * <p>
592 * The default implementation sets various options on the given XMLReader,
593 * such as validation, DTD resolution, entity handlers, etc., according
594 * to the options that were set (e.g. via <code>setEntityResolver</code>)
595 * and set various SAX properties and features that are required for JDOM
596 * internals. These features may change in future releases, so change this
597 * behavior at your own risk.
598 * </p>
599 */
600 protected void configureParser(XMLReader parser, SAXHandler contentHandler)
601 throws JDOMException {
602
603 // Setup SAX handlers.
604
605 parser.setContentHandler(contentHandler);
606
607 if (saxEntityResolver != null) {
608 parser.setEntityResolver(saxEntityResolver);
609 }
610
611 if (saxDTDHandler != null) {
612 parser.setDTDHandler(saxDTDHandler);
613 } else {
614 parser.setDTDHandler(contentHandler);
615 }
616
617 if (saxErrorHandler != null) {
618 parser.setErrorHandler(saxErrorHandler);
619 } else {
620 parser.setErrorHandler(new BuilderErrorHandler());
621 }
622
623 // Setup lexical reporting.
624 boolean lexicalReporting = false;
625 try {
626 parser.setProperty("http://xml.org/sax/handlers/LexicalHandler",
627 contentHandler);
628 lexicalReporting = true;
629 } catch (SAXNotSupportedException e) {
630 // No lexical reporting available
631 } catch (SAXNotRecognizedException e) {
632 // No lexical reporting available
633 }
634
635 // Some parsers use alternate property for lexical handling (grr...)
636 if (!lexicalReporting) {
637 try {
638 parser.setProperty(
639 "http://xml.org/sax/properties/lexical-handler",
640 contentHandler);
641 lexicalReporting = true;
642 } catch (SAXNotSupportedException e) {
643 // No lexical reporting available
644 } catch (SAXNotRecognizedException e) {
645 // No lexical reporting available
646 }
647 }
648
649 // Try setting the DeclHandler if entity expansion is off
650 if (!expand) {
651 try {
652 parser.setProperty(
653 "http://xml.org/sax/properties/declaration-handler",
654 contentHandler);
655 } catch (SAXNotSupportedException e) {
656 // No lexical reporting available
657 } catch (SAXNotRecognizedException e) {
658 // No lexical reporting available
659 }
660 }
661 }
662
663 private void setFeaturesAndProperties(XMLReader parser,
664 boolean coreFeatures)
665 throws JDOMException {
666 // Set any user-specified features on the parser.
667 Iterator iter = features.keySet().iterator();
668 while (iter.hasNext()) {
669 String name = (String)iter.next();
670 Boolean value = (Boolean)features.get(name);
671 internalSetFeature(parser, name, value.booleanValue(), name);
672 }
673
674 // Set any user-specified properties on the parser.
675 iter = properties.keySet().iterator();
676 while (iter.hasNext()) {
677 String name = (String)iter.next();
678 internalSetProperty(parser, name, properties.get(name), name);
679 }
680
681 if (coreFeatures) {
682 // Set validation.
683 try {
684 internalSetFeature(parser,
685 "http://xml.org/sax/features/validation",
686 validate, "Validation");
687 } catch (JDOMException e) {
688 // If validation is not supported, and the user is requesting
689 // that we don't validate, that's fine - don't throw an
690 // exception.
691 if (validate)
692 throw e;
693 }
694
695 // Setup some namespace features.
696 internalSetFeature(parser,
697 "http://xml.org/sax/features/namespaces",
698 true, "Namespaces");
699 internalSetFeature(parser,
700 "http://xml.org/sax/features/namespace-prefixes",
701 true, "Namespace prefixes");
702 }
703
704 // Set entity expansion
705 // Note SAXHandler can work regardless of how this is set, but when
706 // entity expansion it's worth it to try to tell the parser not to
707 // even bother with external general entities.
708 // Apparently no parsers yet support this feature.
709 // XXX It might make sense to setEntityResolver() with a resolver
710 // that simply ignores external general entities
711 try {
712 if (parser.getFeature("http://xml.org/sax/features/external-general-entities") != expand) {
713 parser.setFeature("http://xml.org/sax/features/external-general-entities", expand);
714 }
715 }
716 catch (SAXNotRecognizedException e) { /* Ignore... */ }
717 catch (SAXNotSupportedException e) { /* Ignore... */ }
718 }
719
720 /**
721 * Tries to set a feature on the parser. If the feature cannot be set,
722 * throws a JDOMException describing the problem.
723 */
724 private void internalSetFeature(XMLReader parser, String feature,
725 boolean value, String displayName) throws JDOMException {
726 try {
727 parser.setFeature(feature, value);
728 } catch (SAXNotSupportedException e) {
729 throw new JDOMException(
730 displayName + " feature not supported for SAX driver " + parser.getClass().getName());
731 } catch (SAXNotRecognizedException e) {
732 throw new JDOMException(
733 displayName + " feature not recognized for SAX driver " + parser.getClass().getName());
734 }
735 }
736
737 /**
738 * <p>
739 * Tries to set a property on the parser. If the property cannot be set,
740 * throws a JDOMException describing the problem.
741 * </p>
742 */
743 private void internalSetProperty(XMLReader parser, String property,
744 Object value, String displayName) throws JDOMException {
745 try {
746 parser.setProperty(property, value);
747 } catch (SAXNotSupportedException e) {
748 throw new JDOMException(
749 displayName + " property not supported for SAX driver " + parser.getClass().getName());
750 } catch (SAXNotRecognizedException e) {
751 throw new JDOMException(
752 displayName + " property not recognized for SAX driver " + parser.getClass().getName());
753 }
754 }
755
756 /**
757 * <p>
758 * This builds a document from the supplied
759 * input stream.
760 * </p>
761 *
762 * @param in <code>InputStream</code> to read from
763 * @return <code>Document</code> resultant Document object
764 * @throws JDOMException when errors occur in parsing
765 * @throws IOException when an I/O error prevents a document
766 * from being fully parsed.
767 */
768 public Document build(InputStream in)
769 throws JDOMException, IOException {
770 return build(new InputSource(in));
771 }
772
773 /**
774 * <p>
775 * This builds a document from the supplied
776 * filename.
777 * </p>
778 *
779 * @param file <code>File</code> to read from
780 * @return <code>Document</code> resultant Document object
781 * @throws JDOMException when errors occur in parsing
782 * @throws IOException when an I/O error prevents a document
783 * from being fully parsed
784 */
785 public Document build(File file)
786 throws JDOMException, IOException {
787 try {
788 URL url = fileToURL(file);
789 return build(url);
790 } catch (MalformedURLException e) {
791 throw new JDOMException("Error in building", e);
792 }
793 }
794
795 /**
796 * <p>
797 * This builds a document from the supplied
798 * URL.
799 * </p>
800 *
801 * @param url <code>URL</code> to read from.
802 * @return <code>Document</code> - resultant Document object.
803 * @throws JDOMException when errors occur in parsing
804 * @throws IOException when an I/O error prevents a document
805 * from being fully parsed.
806 */
807 public Document build(URL url)
808 throws JDOMException, IOException {
809 String systemID = url.toExternalForm();
810 return build(new InputSource(systemID));
811 }
812
813 /**
814 * <p>
815 * This builds a document from the supplied
816 * input stream.
817 * </p>
818 *
819 * @param in <code>InputStream</code> to read from.
820 * @param systemId base for resolving relative URIs
821 * @return <code>Document</code> resultant Document object
822 * @throws JDOMException when errors occur in parsing
823 * @throws IOException when an I/O error prevents a document
824 * from being fully parsed
825 */
826 public Document build(InputStream in, String systemId)
827 throws JDOMException, IOException {
828
829 InputSource src = new InputSource(in);
830 src.setSystemId(systemId);
831 return build(src);
832 }
833
834 /**
835 * <p>
836 * This builds a document from the supplied
837 * Reader. It's the programmer's responsibility to make sure
838 * the reader matches the encoding of the file. It's often easier
839 * and safer to use an InputStream rather than a Reader, and to let the
840 * parser auto-detect the encoding from the XML declaration.
841 * </p>
842 *
843 * @param characterStream <code>Reader</code> to read from
844 * @return <code>Document</code> resultant Document object
845 * @throws JDOMException when errors occur in parsing
846 * @throws IOException when an I/O error prevents a document
847 * from being fully parsed
848 */
849 public Document build(Reader characterStream)
850 throws JDOMException, IOException {
851 return build(new InputSource(characterStream));
852 }
853
854 /**
855 * <p>
856 * This builds a document from the supplied
857 * Reader. It's the programmer's responsibility to make sure
858 * the reader matches the encoding of the file. It's often easier
859 * and safer to use an InputStream rather than a Reader, and to let the
860 * parser auto-detect the encoding from the XML declaration.
861 * </p>
862 *
863 * @param characterStream <code>Reader</code> to read from.
864 * @param systemId base for resolving relative URIs
865 * @return <code>Document</code> resultant Document object
866 * @throws JDOMException when errors occur in parsing
867 * @throws IOException when an I/O error prevents a document
868 * from being fully parsed
869 */
870 public Document build(Reader characterStream, String systemId)
871 throws JDOMException, IOException {
872
873 InputSource src = new InputSource(characterStream);
874 src.setSystemId(systemId);
875 return build(src);
876 }
877
878 /**
879 * <p>
880 * This builds a document from the supplied
881 * URI.
882 * </p>
883 * @param systemId URI for the input
884 * @return <code>Document</code> resultant Document object
885 * @throws JDOMException when errors occur in parsing
886 * @throws IOException when an I/O error prevents a document
887 * from being fully parsed
888 */
889 public Document build(String systemId)
890 throws JDOMException, IOException {
891 return build(new InputSource(systemId));
892 }
893
894 // /**
895 // * Imitation of File.toURL(), a JDK 1.2 method, reimplemented
896 // * here to work with JDK 1.1.
897 // *
898 // * @see java.io.File
899 // *
900 // * @param f the file to convert
901 // * @return the file path converted to a file: URL
902 // */
903 // protected URL fileToURL(File f) throws MalformedURLException {
904 // String path = f.getAbsolutePath();
905 // if (File.separatorChar != '/') {
906 // path = path.replace(File.separatorChar, '/');
907 // }
908 // if (!path.startsWith("/")) {
909 // path = "/" + path;
910 // }
911 // if (!path.endsWith("/") && f.isDirectory()) {
912 // path = path + "/";
913 // }
914 // return new URL("file", "", path);
915 // }
916
917 /** Custom File.toUrl() implementation to handle special chars in file names
918 *
919 * @param file file object whose path will be converted
920 * @return URL form of the file, with special characters handled
921 * @throws MalformedURLException if there's a problem constructing a URL
922 */
923 private static URL fileToURL(File file) throws MalformedURLException {
924 StringBuffer buffer = new StringBuffer();
925 String path = file.getAbsolutePath();
926
927 // Convert non-URL style file separators
928 if (File.separatorChar != '/') {
929 path = path.replace(File.separatorChar, '/');
930 }
931
932 // Make sure it starts at root
933 if (!path.startsWith("/")) {
934 buffer.append('/');
935 }
936
937 // Copy, converting URL special characters as we go
938 int len = path.length();
939 for (int i = 0; i < len; i++) {
940 char c = path.charAt(i);
941 if (c == ' ')
942 buffer.append("%20");
943 else if (c == '#')
944 buffer.append("%23");
945 else if (c == '%')
946 buffer.append("%25");
947 else if (c == '&')
948 buffer.append("%26");
949 else if (c == ';')
950 buffer.append("%3B");
951 else if (c == '<')
952 buffer.append("%3C");
953 else if (c == '=')
954 buffer.append("%3D");
955 else if (c == '>')
956 buffer.append("%3E");
957 else if (c == '?')
958 buffer.append("%3F");
959 else if (c == '~')
960 buffer.append("%7E");
961 else
962 buffer.append(c);
963 }
964
965 // Make sure directories end with slash
966 if (!path.endsWith("/") && file.isDirectory()) {
967 buffer.append('/');
968 }
969
970 // Return URL
971 return new URL("file", "", buffer.toString());
972 }
973
974 /**
975 * Returns whether or not entities are being expanded into normal text
976 * content.
977 *
978 * @return whether entities are being expanded
979 */
980 public boolean getExpandEntities() {
981 return expand;
982 }
983
984 /**
985 * <p>
986 * This sets whether or not to expand entities for the builder.
987 * A true means to expand entities as normal content. A false means to
988 * leave entities unexpanded as <code>EntityRef</code> objects. The
989 * default is true.
990 * </p>
991 * <p>
992 * When this setting is false, the internal DTD subset is retained; when
993 * this setting is true, the internal DTD subset is not retained.
994 * </p>
995 * <p>
996 * Note that Xerces (at least up to 1.4.4) has a bug where entities
997 * in attribute values will be misreported if this flag is turned off,
998 * resulting in entities to appear within element content. When turning
999 * entity expansion off either avoid entities in attribute values, or
1000 * use another parser like Crimson.
1001 * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111
1002 * </p>
1003 *
1004 * @param expand <code>boolean</code> indicating whether entity expansion
1005 * should occur.
1006 */
1007 public void setExpandEntities(boolean expand) {
1008 this.expand = expand;
1009 }
1010}