Source code: com/thermidor/xml/SAXParserBase.java
1 package com.thermidor.xml;
2 /*@LEGAL@*/
3 import org.xml.sax.SAXException;
4 import org.xml.sax.SAXParseException;
5 import org.xml.sax.helpers.DefaultHandler;
6 import org.xml.sax.helpers.AttributesImpl;
7 import java.util.LinkedList;
8 import java.util.NoSuchElementException;
9 import org.xml.sax.Locator;
10 import org.xml.sax.Attributes;
11 import org.xml.sax.InputSource;
12 import org.xml.sax.EntityResolver;
13 import org.xml.sax.XMLReader;
14 import org.xml.sax.XMLFilter;
15 import java.util.Hashtable;
16 import java.io.IOException;
17
18 /**
19 * The SAXParserBase class is a base is provided as a framework for the
20 creation of XML content handlers. The framework captures some of the
21 simplicities of using DOM while maintaing much of the power and efficently
22 of the SAX API. The primary arena for the use of the SAXParserBase as a
23 framework for XML processing is where XML documents have to be converted
24 to in memory models. To put this an other way the XML is really an
25 externalization of an 'object model'
26
27 <p>Core to the conception of the SAXParserBase s the SAXEement class. This
28 class in many ways is superficailly similar to the DOM node. On of the
29 recurring issues with SAX programming is the maintence of context while
30 parsing the document. The SAXParserBase does this by maintaining a stack
31 of SAXElements, where each SAXElement instance corresponds to an
32 encountered tag. When a tag is encountered it is pushed on to the stack
33 and passed to the handleStartElement operation. As this context
34 information is maintained on the stack the end elent can be retrieved from
35 the top of the stack when the SAX event is recieved. At this time the
36 element is poped from the stack and passed to the handleEndElement
37 operation. Any PCDATA tat is found that is within the immediate context
38 of the tag is added to the that element. There are a number of rules
39 constraints that apply of the lifecycle of SAXElements as therey are
40 dispatched to the handler operations. In the handleStartElement operation
41 non of the PCDATA for a tag is availbel as the SAXEvent that corresponds
42 to it has not yet been recieved by the SAXParserBase. . Whn the end tag is
43 encounterd all attributes and PCDATA for the tag are availble. This is
44 very different from the basic SAX API and is of considerable utility.<p>
45 <p>Virtually all content handlers that use the SAXParserBase as their base
46 class follow a similar pattern. The following example illustrates this.
47 It is common that the implementation of a SAXParserBase based content
48 handler consists of two classes, and abstract parser, and its contrete
49 implementation. The abstract parser extends the SAXParserBase to provide a
50 token table and the implementation of the handleStartElement and
51 handleEndElement operations. These two operations are dispathc functions
52 that associate a lookup in the token table with an operation to handle
53 eith the start of end of that element. In many ways each abstract parser
54 is a specialization of the SAX parser paradigm that provides fine grain
55 events for the start and end of tags for specific schemas or DTDs. In an
56 abstract parser it is usual to define start and end events for all of the
57 lements in the DTD or schema although it is not always necessary to
58 provide real implemntations for all events.</p>
59 <p>For a detailed example of the implementation see the
60 {@link package-summary.html#package_description package documentation}</p>
61 @author Edward Turnock
62 @version 1.0
63 */
64 public abstract class SAXParserBase
65 extends DefaultHandler {
66 /**
67 * Set up the log4j category.
68 */
69 // private static final Category CAT;
70 // static{
71 // CAT = Category.getInstance( SAXParserBase.class.getName() );
72 // }
73
74 /**
75 The purpose of the SAXElement class is to provide an aggregation of the
76 results if the SAX content handler event and a mechanism to manage the
77 context state of tags as they are encountered.
78 @author Edward Turnock
79 */
80 public static class SAXElement {
81
82 /**
83 The characters StringBuffer is used to maintain any PCDATA that is
84 encountered in the immediate context of this tag.
85 */
86 private StringBuffer characters;
87
88 /**
89 The uri attribute maintains the namespace uri of the namespace that
90 the lement belongs to. The semantics of this are the same as those
91 deFined for the SAX API.
92 */
93 private String uri;
94
95 /**
96 The qaulified name of the Element. The semantics of this are the
97 same as those deifned for the SAX API.
98 */
99 private String qname;
100
101 /**
102 The local name of the elemt without any namespace prefix.
103 */
104 private String lname;
105
106 /**
107 The attributes associated with the element.
108 */
109 private Attributes attributes;
110
111
112 private Hashtable data=new Hashtable();
113
114
115 public Object putExt(Object key, Object value) {
116 return data.put(key,value);
117 }
118 public Object getExt(Object key) {
119 return data.get(key);
120 }
121
122 private SAXElement parent=null;
123
124 public SAXElement getParent() {
125 return parent;
126 }
127 public void setParent(SAXElement _parent) {
128 parent = _parent;
129 }
130 /**
131 Construct a new SAXElement instance with the specified parameters.
132 The parameters with which a SAXElement are constraucted are the
133 parameters of the SAX contenthandler startELement operation.
134 @param uri the namespace URI of the element
135 @param lname the local name component of the elements name.
136 @param qname the namespace qualified name of the element.
137 @param attributes the attributes associated with the element.
138 */
139 public SAXElement( String uri,
140 String lname,
141 String qname,
142 Attributes attributes ) {
143 this.uri = uri;
144 this.lname = lname;
145 this.qname = qname;
146 this.attributes = attributes;
147 }
148
149 /**
150 Return the text, PCDATA, that is associated with this element. The
151 text property is only really valid when the end tag has been reached
152 @return the PCDATA that is immediately enclosed by this tag.
153 */
154 public String getText() {
155 String retval = null;
156
157 if ( characters != null ) {
158 retval = characters.toString();
159 }
160
161 return retval;
162 }
163
164 /**
165 * Append the specified buffer region to the PCDAT maintained by the
166 * element.. The parameters of this operation are the same as those of
167 * the characters operation on the SAX content handler.
168 * @param ch the char buffer that contains the region of interest/
169 * @param start where in the buffer the region starts.
170 * @param length how much of the buffer is relevant.
171 */
172 public void appendText( char[] ch,
173 int start,
174 int length ) {
175 if ( characters == null ) {
176 characters = new StringBuffer();
177 }
178
179 characters.append( ch, start, length );
180 }
181
182 /**
183 * Return the namespace URI of this element.
184 * @return the namespace URI.
185 */
186 public String getUri() {
187 return uri;
188 }
189
190 /**
191 * Retrieve the prefx qualified name of the element.
192 * @return the fully qualified name of the Element.
193 */
194 public String getQname() {
195 return qname;
196 }
197
198 /**
199 * Retrieve the local name part of the elements name.
200 * @return the local nameof the element.
201 */
202 public String getLname() {
203 return lname;
204 }
205
206 /**
207 * Retrieve the value of the named attribute as a String.
208 * This operation currently assumes that no namespaces are being used.
209 * @param localName the localName of the attribute to retreive.
210 * @return the value of the attribute required or null of the
211 * attribute was not part of the element.
212 */
213 public String getValue( String localName ) {
214 return attributes.getValue( "", localName );
215 }
216
217 /**
218 * This operation allows the name space uri to be used
219 * @param uri the namespace uri of the attribute
220 * @param localName the localName of the attribute to retreive.
221 * @return the value of the attribute required or null of the
222 * attribute was not part of the element.
223 */
224 public String getValue( String uri, String localName ) {
225 return attributes.getValue( uri, localName );
226 }
227
228 /**
229 * Retrieve the attributes associated with the element.
230 * @return the attributes.
231 */
232 public Attributes getAttributes() {
233 return attributes;
234 }
235
236 /**
237 * Return a simplified straing represntation of this element.
238 * @return the sprering representation of this element.
239 */
240 public String toString() {
241 StringBuffer sb = new StringBuffer();
242 int len = attributes.getLength();
243
244 for ( int i = 0; i < len; i++ ) {
245 sb.append( " " );
246 sb.append( attributes.getQName( i ) );
247 sb.append( "=\"" );
248 sb.append( SAXWriter.normalize( attributes.getValue( i ) ) );
249 sb.append( '"' );
250 }
251
252
253 return "<" + lname + " "+sb.toString()+">";
254 }
255 }
256
257 /**
258 * The reader associated with this ContentHandler, if any. Used for
259 * chaining XMLFilters.
260 */
261 protected XMLReader reader = null;
262 /**
263 * Get the value of reader.
264 * @return value of reader.
265 */
266 public XMLReader getReader() {
267 return reader;
268 }
269
270 /**
271 * Set the value of reader.
272 * @param v Value to assign to reader.
273 */
274 public void setReader( XMLReader v ) {
275 if ( reader != null && reader instanceof XMLFilter ) {
276 ( ( XMLFilter ) reader ).setParent( v );
277
278 }
279 else {
280 this.reader = v;
281 }
282 }
283
284 /**
285 * Default constructor, sets up a {@link SAXWriter} to filter and pretty
286 * print parsed XML
287 * to System.out if the log level is {@link Priority.DEBUG}.
288 */
289 protected SAXParserBase() {
290 // if ( CAT == null ||
291 // CAT.getPriority() == null ||
292 // CAT.getPriority().isGreaterOrEqual( Priority.DEBUG ) ) {
293 // reader = new SAXWriter( System.out, null, true );
294 // reader.setContentHandler( this );
295 // }
296 }
297
298 /**
299 The stack of SAXelement instances that the SAXParserBase uses to
300 maintain consttext within the event stream from the SAX parser.
301 */
302 private LinkedList contextStack = new LinkedList();
303 /**
304 * Return the depth in the parse.
305 * @return the parse depth.
306 */
307 protected int depth() {
308 return contextStack.size();
309 }
310 /**
311 * This utility operation is used within the SAXParserBAse to append
312 * character data to the top element of the stack.
313 * @param ch the char buffer that contains the region of interest.
314 * @param start where in the buffer the region starts.
315 * @param length how much of the buffer is relevant.
316 * @throws NoSuchElementException if there are no elements left in
317 * the context this condition would indicate a major violation in the
318 * state of the parser.
319 */
320 private void appendText( char[] ch, int start, int length )
321 throws NoSuchElementException {
322 ( ( SAXElement ) contextStack.getFirst() ).appendText( ch,
323 start,
324 length );
325 }
326
327 /**
328 * Pop the top SAXelement of the context stack.
329 * @return the top element of the stack, a closing tag.
330 * @throws NoSuchElementException if the SAX is empty. This should only
331 * occur in XML documents that are not well formed.
332 */
333 private SAXElement pop()
334 throws NoSuchElementException {
335 return ( SAXElement ) contextStack.removeFirst();
336 }
337
338 /**
339 * Push the specified SAXElement instance onto the top of the context
340 * stack.
341 * @param element the element to record the context of.
342 */
343 private void push( SAXElement element ) {
344 contextStack.addFirst( element );
345 }
346
347 /**
348 The deleate eneity resolve to be used. Optional.
349 */
350 private EntityResolver delegateResolver = null;
351
352 /**
353 * Set the delegate enityt resolver that will be used to resolve entities
354 * @param er the delegate entity resolver
355 */
356 public void setDelegateEntityResolver( EntityResolver er ) {
357 delegateResolver = er;
358 }
359
360 /**
361 * The reolveEntity operation overrides that resolveEntity operation on
362 * DefaultHandler. The implementation of the operation delegates the
363 * delegateEntityResolver if one is defined otherwise it delegates to the
364 * super class implemetation of the operation.
365 * @param publicId the public identifier of the entity to resolve.
366 * @param systemId the system identifier of the entity to resolve.
367 * @return the InputSource that encapsulates access to the entity.
368 * @exception SAXException if an error occurs resolving the entity
369 * @see org.xml.sax.helpers.DefaultHandler#resolveEntity
370 */
371 public InputSource resolveEntity( String publicId,
372 String systemId )
373 throws SAXException {
374 InputSource retval = null;
375
376 if ( delegateResolver != null ) {
377 try {
378 retval = delegateResolver.resolveEntity( publicId, systemId );
379
380 } catch ( IOException ioe ) {
381 throw new SAXException( ioe );
382 }
383 }
384
385 if ( retval != null ) {
386 return retval;
387 }
388
389 return super.resolveEntity( publicId, systemId );
390 }
391
392 /**
393 * Overide of the notationDecl_ operation in the DefaultHandler
394 * @param name a <code>String</code> value for the name of the
395 * notation declaration
396 * @param publicId a <code>String</code> value representing the public
397 * id of the notation
398 * @param systemId a <code>String</code> value for the system id of the
399 * notation
400 * @exception SAXException if an error occurs
401 * @see org.xml.sax.helpers.DefaultHandler
402 */
403 public void notationDecl( String name,
404 String publicId,
405 String systemId )
406 throws SAXException {
407 ;
408 }
409
410 /**
411 * Overide of the unparsedEntityDecl operation in the DefaultHandler
412 * @param name a <code>String</code> value for the name of the entity
413 * @param publicId a <code>String</code> value representing the public id
414 * of the entity
415 * @param systemId a <code>String</code> value representing the system id
416 * of the entity
417 * @param notationName a <code>String</code> identifying the naotation
418 * that the entity belongs to
419 * @exception SAXException is raised if the declaration could not be
420 * handled
421 * @see org.xml.sax.helpers.DefaultHandler
422 */
423 public void unparsedEntityDecl( String name,
424 String publicId,
425 String systemId,
426 String notationName )
427 throws SAXException {
428 ;
429 }
430
431 /**
432 * Overide of the setDocumentLocator operation in the DefaultHandler
433 * @param locator the locator object set by the SAX parser to indicate
434 * where an error occured
435 * @see org.xml.sax.helpers.DefaultHandler
436 */
437 public void setDocumentLocator( Locator locator ) {
438 ;
439 }
440
441 /**
442 * Overide of the startDocument operation in the DefaultHandler
443 * @exception SAXException is raised if start of the document could not be
444 * handled
445 * @see org.xml.sax.helpers.DefaultHandler
446 */
447 public void startDocument()
448 throws SAXException {
449 ;
450 }
451
452 /**
453 * Overide of the endDocument operation in the DefaultHandler
454 * @exception SAXException is raised if the end of the document could not
455 * be handled.
456 * @see org.xml.sax.helpers.DefaultHandler
457 */
458 public void endDocument()
459 throws SAXException {
460 ;
461 }
462
463 /**
464 * Overide of the startPrefixMapping operation in the DefaultHandler
465 * @param prefix the prefix that corresponds to the uri .
466 * @param uri the uri that corresponds to the namespace prefix
467 * @exception SAXException if an error occurs
468 * @see org.xml.sax.helpers.DefaultHandler
469 */
470 public void startPrefixMapping( String prefix,
471 String uri )
472 throws SAXException {
473 ;
474 }
475
476 /**
477 * Overide of the endPrefixMapping operation in the DefaultHandler
478 * @param prefix the namespace prefix
479 * @exception SAXException is raised if the end of the prefix mapping
480 * could not be handled.
481 * @see org.xml.sax.helpers.DefaultHandler
482 */
483 public void endPrefixMapping( String prefix )
484 throws SAXException {
485 ;
486 }
487 /**
488 * Overide of the startElement operation in the DefaultHandler
489 * @param namespaceURI the namespace uri of the element
490 * @param localName the local name of the element
491 * @param qname the prefix qualified name of the element
492 * @param atts the attributes of the element
493 * @exception SAXException if an error occurs
494 * @see org.xml.sax.helpers.DefaultHandler
495 */
496 public void startElement( String namespaceURI,
497 String localName,
498 String qname,
499 Attributes atts )
500 throws SAXException {
501 SAXElement element = new SAXElement( namespaceURI,
502 localName,
503 qname,
504 new AttributesImpl(atts) );
505
506
507 if(contextStack.size()>0) {
508 element.setParent(( SAXElement ) contextStack.getFirst());
509 }
510
511 push( element );
512 handleStartElement( element );
513 }
514
515 /**
516 * Overide of the endElement operation in the DefaultHandler
517 * @param uri the namespace uri of the element
518 * @param lname the local name of the element
519 * @param qname the prefix qualified name of the element
520 * @exception org.xml.sax.SAXException is raised if the end element
521 * could not be handled
522 * @see org.xml.sax.helpers.DefaultHandler
523 */
524 public void endElement( String uri, String lname, String qname )
525 throws org.xml.sax.SAXException {
526 SAXElement element = pop();
527 handleEndElement( element );
528 }
529
530 /**
531 * The characters operation is called by the SAX parsers to pass
532 * character data to the content handler. In this case the operation
533 * appends the test to the SAXElement that represents the current
534 * parsing context.
535 * @param ch the character buffer that contains the data
536 * @param start the position within the buffer that the relevent
537 * data strats at
538 * @param length the offset from start indicating the number of
539 * valid characters in the array.
540 * @throws SAXException if the parsing context was invalid.
541 */
542 public void characters( char[] ch,
543 int start,
544 int length )
545 throws SAXException {
546 try {
547 appendText( ch, start, length );
548
549 } catch ( NoSuchElementException nsee ) {
550 throw new SAXException( "Context Stack in an Invalid State" );
551 }
552 }
553
554 /**
555 * Overide of the ignorableWhitespace operation in the DefaultHandler
556 * @param ch the character buffer that contains the data
557 * @param start the position within the buffer that the relevent
558 * data strats at
559 * @param len the offset from start indicating the number of
560 * valid characters in the array.
561 * @see org.xml.sax.helpers.DefaultHandler
562 * @throws SAXException if the whitespae could not be processed.
563 */
564 public void ignorableWhitespace( char[] ch,
565 int start,
566 int len )
567 throws SAXException {
568 ;
569 }
570
571 /**
572 * Overide of the processingInstruction operation in the DefaultHandler
573 * @param target the identifier of the processing instruction
574 * @param data any data associated with the processing instruction
575 * @exception SAXException if an error occurs
576 * @see org.xml.sax.helpers.DefaultHandler
577 */
578 public void processingInstruction( String target, String data )
579 throws SAXException {
580 ;
581 }
582
583 /**
584 * Overide of the skippedEntity operation in the DefaultHandler
585 * @param name the name of the entity that was skipped
586 * @exception SAXException is raised if the skipping the entity caused a
587 * problem
588 * @see org.xml.sax.helpers.DefaultHandler
589 */
590 public void skippedEntity( String name )
591 throws SAXException {
592 ;
593 }
594
595 /**
596 * Overide of the warning operation in the DefaultHandler
597 * @param warning a <code>SAXParseException</code> value
598 * @exception SAXException is raised if the warning exception could not be
599 * adequately handled
600 * @see org.xml.sax.helpers.DefaultHandler
601 */
602 public void warning( SAXParseException warning )
603 throws SAXException {
604 ;
605 }
606
607 /**
608 * Overide of the error operation in the DefaultHandler
609 * @param error a <code>SAXParseException</code> value
610 * @exception SAXException is raised if the error exception could not be
611 * adequately handled
612 * @see org.xml.sax.helpers.DefaultHandler
613 */
614 public void error( SAXParseException error )
615 throws SAXException {
616 ;
617 }
618
619 /**
620 * Overide of the fatalError operation in the DefaultHandler
621 * @param fatal a <code>SAXParseException</code> value
622 * @exception SAXException is raised if the error exception could not be
623 * adequately handled
624 * @see org.xml.sax.helpers.DefaultHandler
625 */
626 public void fatalError( SAXParseException fatal )
627 throws SAXException {
628 ;
629 }
630
631 /**
632 * The handleStartElement operation is called by the SAXPArserBase instance
633 * in response to a SAX event that indicates that a start tag has been
634 * encounterd. The SAXParserBase instance will have constructed a
635 * SAXElement
636 * instance that encapsulates the start state of the element and pushed it
637 * onto the context stack prior to passing it to this operation. It should
638 * be remembered that any PCDATA content associated with a tag will not be
639 * available until the end tag has been reached. This operation should be
640 * implemented by a conrete parser instance.
641 * @param element the SAXElement instance that encapsulates the state of
642 * the current tag.
643 * @throws SAXException is raised if the element could not be handled.
644 */
645 protected abstract void handleStartElement( SAXElement element )
646 throws SAXException;
647 /**
648 * The handleEndElement operation is called by the SAXParserBase instance
649 * in response to a SAX event indicating that an end tag has been. The
650 * SAXParserBase instance will have recovered the appropriate SAXElement
651 * instance for this tag from its context stack before calling this
652 * operation. Then this operation is called all nested tags will have been
653 * processed and any PCDATA contained in the tag will be associated with
654 * the SAXElement instance for that tag. This operation should be
655 * implemented by a concrete instance of the parser.
656 * @param element the SAXElement instance that encapsulates the state of
657 * the current tag.
658 * @throws SAXException is raised if the element could not be handled.
659 */
660 protected abstract void handleEndElement( SAXElement element )
661 throws SAXException;
662 /**
663 * Utility function to manage defined attribute where the value is
664 * the empty string.
665 * @param attribute the attribute that the value belongs to.
666 * @param toCheck the string to check
667 * @return the toCheck string if it is not deemed empty.
668 * @throws EmptyAttributeException if the string was deemed empty.
669 */
670 public static String noEmptyValue( String attribute, String toCheck )
671 throws EmptyAttributeException {
672 String retval;
673
674 if ( toCheck != null && ( retval = toCheck.trim() ).length() > 0 ) {
675 return retval;
676 }
677
678 throw new EmptyAttributeException( attribute );
679 }
680 }