Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/media/mn8/protocol/jabber/xmlparser/XMLParser.java


1   /* 
2    * $COPYRIGHT$
3    * $Id: XMLParser.java,v 1.2 2002/07/24 23:24:59 neuro Exp $
4    *
5    * Date        Author            Changes 
6    * APR 08 2002 Szabo Csaba       Created
7    */
8   package org.media.mn8.protocol.jabber.xmlparser;
9   
10  import java.io.*;
11  import java.util.*;
12  
13  /**
14   * The main XML Parser class.
15   */
16  public class XMLParser {
17      /**
18       * The reader from which the stream is being read
19       */    
20      private Reader inputReader;
21      
22      /**
23       * The handler for XML Events.
24       */
25      private XMLEventListener eventHandler;
26      
27      /**
28       * The root tag for the document.
29       */
30      private String rootTag = null;
31  
32      /**
33       * Flag to say whether or not this stream is UTF-8 encoded.
34       */
35      private boolean isUTF8Encoded;
36  
37      /**
38       * The buffer for incomming data.
39       */
40      private StringBuffer dataBuffer;
41     
42      /**
43       * The input stream being read.
44       */
45      private InputStream is;
46  
47  
48      /**
49       * Constructor, Used to override default dispatcher.
50       *
51       * @param _eventHandler The event handle to dispatch events through.
52       */
53      public XMLParser( XMLEventListener _eventHandler ) {
54          eventHandler = _eventHandler;
55          dataBuffer = new StringBuffer();
56      }
57  
58  
59      /**
60       * Method to determine if a character is a whitespace.
61       *
62       * @param c The character to check.
63       * @return true if the character is a whitespace, false if not.
64       */
65      private boolean isWhitespace( char c ) {
66          if( c == ' ' ||  c == '\t' ||  c == '\r' ||  c == '\n' ) return true;
67          return false;
68      }
69  
70  
71      /**
72       * Method to set the flag to state whether or not the input is UTF-8
73       * encoded. For the UTF-8 decoding to work the parse method MUST be
74       * called by passing it a java.io.DataInputStream object.
75       *
76       * @param flag True if UTF-8 decoding should be performed on the input
77       *  stream, false if not.
78       */
79      public void setInputUTF8Encoded( boolean flag ) {
80          isUTF8Encoded = flag;
81      }
82      
83  
84      /**
85       * Method to get the next character from the input stream.
86       */
87      public int getNextCharacter() throws IOException {
88          int actualValue = -1;
89          int inputValue = inputReader.read();
90          if( inputValue == -1 ) return -1;
91  
92          if( isUTF8Encoded == false ) {
93              actualValue = inputValue;
94          }
95          else {
96              inputValue &= 0xff;
97              if ( (inputValue & 0x80) == 0 ) {
98                  actualValue = inputValue;
99              }
100             else if ( (inputValue & 0xF8) == 0xF0 ) {
101                 actualValue = (inputValue & 0x1f)<<6;
102 
103                 int nextByte = inputReader.read() & 0xff;
104                 if( (nextByte & 0xC0) != 0x80 )
105                     throw new IOException( "Invalid UTF-8 format" );
106                 actualValue += (nextByte & 0x3F )<<6;
107                 
108                 nextByte = inputReader.read() & 0xff;
109                 if( (nextByte & 0xC0) != 0x80 )
110                     throw new IOException( "Invalid UTF-8 format" );
111                 actualValue += (nextByte & 0x3F )<<6;
112 
113                 nextByte = inputReader.read() & 0xff;
114                 if( (nextByte & 0xC0) != 0x80 )
115                     throw new IOException( "Invalid UTF-8 format" );
116                 actualValue += (nextByte & 0x3F );
117             }
118             else if ( (inputValue & 0xF0) == 0xE0 ) {
119                 actualValue = (inputValue & 0x1f)<<6;
120 
121                 int nextByte = inputReader.read() & 0xff;
122                 if( (nextByte & 0xC0) != 0x80 )
123                     throw new IOException( "Invalid UTF-8 format" );
124                 actualValue += (nextByte & 0x3F )<<6;
125 
126                 nextByte = inputReader.read() & 0xff;
127                 if( (nextByte & 0xC0) != 0x80 )
128                     throw new IOException( "Invalid UTF-8 format" );
129                 actualValue += (nextByte & 0x3F );
130             }
131             else if ( (inputValue & 0xE0) == 0xC0 ) {
132                 actualValue = (inputValue & 0x1f)<<6;
133 
134                 int nextByte = inputReader.read() & 0xff;
135                 if( (nextByte & 0xC0) != 0x80 )
136                     throw new IOException( "Invalid UTF-8 format" );
137                 actualValue += (nextByte & 0x3F );
138             }
139         }
140         
141         return actualValue;
142     }
143 
144 
145     /**
146      * Method to read until an end condition.
147      *
148      * @param checker The class used to check if the end condition has occurred.
149      * @return A string representation of the data read.
150      */
151     private String readUntilEnd( ReadEndChecker checker ) throws IOException, EndOfXMLException {
152         StringBuffer data = new StringBuffer();
153         int nextChar = getNextCharacter();
154         
155         if( nextChar == -1 ) throw new EndOfXMLException();
156         while( nextChar != -1 && checker.shouldStop( nextChar ) == false ) {
157             data.append( (char) nextChar );
158             nextChar = getNextCharacter();
159         }
160         if( nextChar != '<' && nextChar != '>') data.append( (char) nextChar );
161         
162         String returnData = data.toString();
163         return returnData;
164     }
165     
166 
167     /**
168      * Method to handle the attributes in a tag
169      *
170      * @param data The section of the tag holding the attribute details
171      */
172     private Hashtable handleAttributes( String data ) {
173         Hashtable attributes = new Hashtable();
174 
175         int length = data.length();
176         int i = 0;
177         while( i < length ) {
178             StringBuffer nameBuffer = new StringBuffer();
179             char thisChar = data.charAt(i);
180             while( isWhitespace( thisChar ) && i < length ) {
181                 i++;
182                 thisChar = data.charAt(i);
183             }
184             if( thisChar == '>' || i == length ) break;
185             
186             while( thisChar != '=' && i < length ) {
187                 nameBuffer.append(thisChar);
188                 i++;
189                 thisChar = data.charAt(i);
190             }
191             if( i == length ) break;
192 
193             String name = nameBuffer.toString();
194             // See if first character is a character
195             i++;
196             thisChar = data.charAt(i);
197             while( isWhitespace( thisChar ) && i < length ) {
198                 i++;
199                 thisChar = data.charAt(i);
200             }
201 
202             int breakOn = 0;
203             if( thisChar == '\"' ) {
204                 breakOn = 1;
205             }
206             else if (thisChar =='\'' ) {
207                 breakOn = 2;
208             }
209 
210             // Set up buffer for value parameter
211             StringBuffer valueBuffer = new StringBuffer();
212             if( breakOn == 0 ) {
213                 valueBuffer.append( thisChar );
214             }
215 
216             i++;
217             while( i < length ) {
218                 thisChar = data.charAt(i);
219                 i++;
220                 if ( breakOn == 0 && isWhitespace( thisChar ) ) {
221                     break;
222                 }
223                 else if ( breakOn == 1 && thisChar == '\"' ) {
224                     break;
225                 }
226                 else if ( breakOn == 2 && thisChar == '\'' ) {
227                     break;
228                 }
229                 valueBuffer.append( thisChar );
230             }
231             String value = valueBuffer.toString();
232             attributes.put( name, value );
233         }
234         
235         return attributes;
236     }
237 
238     /**
239      * Method to handle the reading and dispatch of tag data.
240      */
241     private void handleTag() throws IOException, EndOfXMLException {
242         boolean startTag = true, emptyTag = false, hasMoreData = true;
243         String  tagName = null;
244         Hashtable attributes = null;
245         String data = readUntilEnd ( inTagReadEndChecker );
246         
247         if( data.startsWith( "?") ) return;
248         int substringStart = 0, substringEnd = data.length();
249 
250         if( data.startsWith( "/" )  ) {
251             startTag = false;
252             substringStart++;
253         }
254 
255         if( data.endsWith( "/" ) ) {
256             emptyTag = true;
257             substringEnd--;
258         }
259 
260         data = data.substring( substringStart, substringEnd );
261         int spaceIdx = 0;
262         while( spaceIdx < data.length() && isWhitespace( data.charAt(spaceIdx) ) == false ) spaceIdx++;
263         
264         tagName = data.substring(0,spaceIdx).toLowerCase();
265         
266         if( spaceIdx != data.length() ) {
267             data = data.substring( spaceIdx+1 );
268             attributes = handleAttributes( data );
269         }
270         tagName = tagName.toLowerCase();
271         
272         if( startTag ) {
273             if( rootTag == null ) rootTag = tagName;
274             eventHandler.tagStarted( tagName, attributes);
275         }
276 
277         if( emptyTag || !startTag ) {
278             eventHandler.tagEnded( tagName );
279             if( rootTag != null && tagName.equals( rootTag ) ) throw new EndOfXMLException();
280         }
281     }
282 
283 
284     /**
285      * Method to handle the reading in and dispatching of events for plain text.
286      */
287     private void handlePlainText() throws IOException, EndOfXMLException {
288         String data = readUntilEnd ( inPlaintextReadEndChecker );
289         eventHandler.plaintextEncountered( data );
290     }
291 
292 
293     /**
294      * Parse wrapper for InputStreams
295      *
296      * @param _inputReader The reader for the XML stream.
297      */
298     public void  parse ( InputStream _is ) throws IOException {
299         is = _is;
300         InputStreamReader isr = new InputStreamReader( is );
301         parse( isr );
302     }
303 
304 
305     /**
306      * The main parsing loop.
307      *
308      * @param _inputReader The reader for the XML stream.
309      */
310     public void  parse ( Reader _inputReader ) throws IOException {
311         inputReader = _inputReader;
312         try {
313             while( true ) {
314                 handlePlainText();
315                 handleTag();
316             }
317         }
318         catch( EndOfXMLException x ) {
319             // The EndOfXMLException is purely used to drop out of the
320             // continuous loop.
321         }
322     }
323 
324 /*
325 ------------------------------------------------------
326 Classes for handling the control of the reading stream
327 ------------------------------------------------------
328 */
329 
330     /**
331      * Class to indicate the end of reading a plain text section
332      */
333     class InPlaintextReadEndChecker implements ReadEndChecker {
334 
335         /**
336          * The method to issue a stop message when a start tag symbol (&gt;) is encountered .
337          *
338          * @param c The character to check
339          * @return true if it is the symbol, false otehrwise.
340          */
341         public boolean shouldStop( int c ) {
342             return (c == '<');
343         }
344     }
345 
346 
347     /**
348      * Shared instance of the plain text end checker.
349      */
350     private final InPlaintextReadEndChecker inPlaintextReadEndChecker = new InPlaintextReadEndChecker();
351 
352     
353     /**
354      * Class to indicate the end of reading a tag section
355      */
356     class InTagReadEndChecker implements ReadEndChecker {
357     
358         /**
359          * The method to issue a stop message when either a space of close tag symbol (&lt;) is encountered .
360          *
361          * @param c The character to check.
362          * @return true if c is either symbol, false otehrwise.
363          */
364         public boolean shouldStop( int c ) {
365             return (c == '>');
366         }
367     }
368 
369 
370     /**
371      * Shared instance of the tag end checker.
372      */
373     private final InTagReadEndChecker inTagReadEndChecker = new InTagReadEndChecker();
374 }