Source code: org/media/mn8/protocol/jabber/xmlparser/XMLParser.java
1 /*
2 * $COPYRIGHT$
3 * $Id: XMLParser.java,v 1.2 2002/07/24 23:24:59 neuro Exp $
4 *
5 * Date Author Changes
6 * APR 08 2002 Szabo Csaba Created
7 */
8 package org.media.mn8.protocol.jabber.xmlparser;
9
10 import java.io.*;
11 import java.util.*;
12
13 /**
14 * The main XML Parser class.
15 */
16 public class XMLParser {
17 /**
18 * The reader from which the stream is being read
19 */
20 private Reader inputReader;
21
22 /**
23 * The handler for XML Events.
24 */
25 private XMLEventListener eventHandler;
26
27 /**
28 * The root tag for the document.
29 */
30 private String rootTag = null;
31
32 /**
33 * Flag to say whether or not this stream is UTF-8 encoded.
34 */
35 private boolean isUTF8Encoded;
36
37 /**
38 * The buffer for incomming data.
39 */
40 private StringBuffer dataBuffer;
41
42 /**
43 * The input stream being read.
44 */
45 private InputStream is;
46
47
48 /**
49 * Constructor, Used to override default dispatcher.
50 *
51 * @param _eventHandler The event handle to dispatch events through.
52 */
53 public XMLParser( XMLEventListener _eventHandler ) {
54 eventHandler = _eventHandler;
55 dataBuffer = new StringBuffer();
56 }
57
58
59 /**
60 * Method to determine if a character is a whitespace.
61 *
62 * @param c The character to check.
63 * @return true if the character is a whitespace, false if not.
64 */
65 private boolean isWhitespace( char c ) {
66 if( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) return true;
67 return false;
68 }
69
70
71 /**
72 * Method to set the flag to state whether or not the input is UTF-8
73 * encoded. For the UTF-8 decoding to work the parse method MUST be
74 * called by passing it a java.io.DataInputStream object.
75 *
76 * @param flag True if UTF-8 decoding should be performed on the input
77 * stream, false if not.
78 */
79 public void setInputUTF8Encoded( boolean flag ) {
80 isUTF8Encoded = flag;
81 }
82
83
84 /**
85 * Method to get the next character from the input stream.
86 */
87 public int getNextCharacter() throws IOException {
88 int actualValue = -1;
89 int inputValue = inputReader.read();
90 if( inputValue == -1 ) return -1;
91
92 if( isUTF8Encoded == false ) {
93 actualValue = inputValue;
94 }
95 else {
96 inputValue &= 0xff;
97 if ( (inputValue & 0x80) == 0 ) {
98 actualValue = inputValue;
99 }
100 else if ( (inputValue & 0xF8) == 0xF0 ) {
101 actualValue = (inputValue & 0x1f)<<6;
102
103 int nextByte = inputReader.read() & 0xff;
104 if( (nextByte & 0xC0) != 0x80 )
105 throw new IOException( "Invalid UTF-8 format" );
106 actualValue += (nextByte & 0x3F )<<6;
107
108 nextByte = inputReader.read() & 0xff;
109 if( (nextByte & 0xC0) != 0x80 )
110 throw new IOException( "Invalid UTF-8 format" );
111 actualValue += (nextByte & 0x3F )<<6;
112
113 nextByte = inputReader.read() & 0xff;
114 if( (nextByte & 0xC0) != 0x80 )
115 throw new IOException( "Invalid UTF-8 format" );
116 actualValue += (nextByte & 0x3F );
117 }
118 else if ( (inputValue & 0xF0) == 0xE0 ) {
119 actualValue = (inputValue & 0x1f)<<6;
120
121 int nextByte = inputReader.read() & 0xff;
122 if( (nextByte & 0xC0) != 0x80 )
123 throw new IOException( "Invalid UTF-8 format" );
124 actualValue += (nextByte & 0x3F )<<6;
125
126 nextByte = inputReader.read() & 0xff;
127 if( (nextByte & 0xC0) != 0x80 )
128 throw new IOException( "Invalid UTF-8 format" );
129 actualValue += (nextByte & 0x3F );
130 }
131 else if ( (inputValue & 0xE0) == 0xC0 ) {
132 actualValue = (inputValue & 0x1f)<<6;
133
134 int nextByte = inputReader.read() & 0xff;
135 if( (nextByte & 0xC0) != 0x80 )
136 throw new IOException( "Invalid UTF-8 format" );
137 actualValue += (nextByte & 0x3F );
138 }
139 }
140
141 return actualValue;
142 }
143
144
145 /**
146 * Method to read until an end condition.
147 *
148 * @param checker The class used to check if the end condition has occurred.
149 * @return A string representation of the data read.
150 */
151 private String readUntilEnd( ReadEndChecker checker ) throws IOException, EndOfXMLException {
152 StringBuffer data = new StringBuffer();
153 int nextChar = getNextCharacter();
154
155 if( nextChar == -1 ) throw new EndOfXMLException();
156 while( nextChar != -1 && checker.shouldStop( nextChar ) == false ) {
157 data.append( (char) nextChar );
158 nextChar = getNextCharacter();
159 }
160 if( nextChar != '<' && nextChar != '>') data.append( (char) nextChar );
161
162 String returnData = data.toString();
163 return returnData;
164 }
165
166
167 /**
168 * Method to handle the attributes in a tag
169 *
170 * @param data The section of the tag holding the attribute details
171 */
172 private Hashtable handleAttributes( String data ) {
173 Hashtable attributes = new Hashtable();
174
175 int length = data.length();
176 int i = 0;
177 while( i < length ) {
178 StringBuffer nameBuffer = new StringBuffer();
179 char thisChar = data.charAt(i);
180 while( isWhitespace( thisChar ) && i < length ) {
181 i++;
182 thisChar = data.charAt(i);
183 }
184 if( thisChar == '>' || i == length ) break;
185
186 while( thisChar != '=' && i < length ) {
187 nameBuffer.append(thisChar);
188 i++;
189 thisChar = data.charAt(i);
190 }
191 if( i == length ) break;
192
193 String name = nameBuffer.toString();
194 // See if first character is a character
195 i++;
196 thisChar = data.charAt(i);
197 while( isWhitespace( thisChar ) && i < length ) {
198 i++;
199 thisChar = data.charAt(i);
200 }
201
202 int breakOn = 0;
203 if( thisChar == '\"' ) {
204 breakOn = 1;
205 }
206 else if (thisChar =='\'' ) {
207 breakOn = 2;
208 }
209
210 // Set up buffer for value parameter
211 StringBuffer valueBuffer = new StringBuffer();
212 if( breakOn == 0 ) {
213 valueBuffer.append( thisChar );
214 }
215
216 i++;
217 while( i < length ) {
218 thisChar = data.charAt(i);
219 i++;
220 if ( breakOn == 0 && isWhitespace( thisChar ) ) {
221 break;
222 }
223 else if ( breakOn == 1 && thisChar == '\"' ) {
224 break;
225 }
226 else if ( breakOn == 2 && thisChar == '\'' ) {
227 break;
228 }
229 valueBuffer.append( thisChar );
230 }
231 String value = valueBuffer.toString();
232 attributes.put( name, value );
233 }
234
235 return attributes;
236 }
237
238 /**
239 * Method to handle the reading and dispatch of tag data.
240 */
241 private void handleTag() throws IOException, EndOfXMLException {
242 boolean startTag = true, emptyTag = false, hasMoreData = true;
243 String tagName = null;
244 Hashtable attributes = null;
245 String data = readUntilEnd ( inTagReadEndChecker );
246
247 if( data.startsWith( "?") ) return;
248 int substringStart = 0, substringEnd = data.length();
249
250 if( data.startsWith( "/" ) ) {
251 startTag = false;
252 substringStart++;
253 }
254
255 if( data.endsWith( "/" ) ) {
256 emptyTag = true;
257 substringEnd--;
258 }
259
260 data = data.substring( substringStart, substringEnd );
261 int spaceIdx = 0;
262 while( spaceIdx < data.length() && isWhitespace( data.charAt(spaceIdx) ) == false ) spaceIdx++;
263
264 tagName = data.substring(0,spaceIdx).toLowerCase();
265
266 if( spaceIdx != data.length() ) {
267 data = data.substring( spaceIdx+1 );
268 attributes = handleAttributes( data );
269 }
270 tagName = tagName.toLowerCase();
271
272 if( startTag ) {
273 if( rootTag == null ) rootTag = tagName;
274 eventHandler.tagStarted( tagName, attributes);
275 }
276
277 if( emptyTag || !startTag ) {
278 eventHandler.tagEnded( tagName );
279 if( rootTag != null && tagName.equals( rootTag ) ) throw new EndOfXMLException();
280 }
281 }
282
283
284 /**
285 * Method to handle the reading in and dispatching of events for plain text.
286 */
287 private void handlePlainText() throws IOException, EndOfXMLException {
288 String data = readUntilEnd ( inPlaintextReadEndChecker );
289 eventHandler.plaintextEncountered( data );
290 }
291
292
293 /**
294 * Parse wrapper for InputStreams
295 *
296 * @param _inputReader The reader for the XML stream.
297 */
298 public void parse ( InputStream _is ) throws IOException {
299 is = _is;
300 InputStreamReader isr = new InputStreamReader( is );
301 parse( isr );
302 }
303
304
305 /**
306 * The main parsing loop.
307 *
308 * @param _inputReader The reader for the XML stream.
309 */
310 public void parse ( Reader _inputReader ) throws IOException {
311 inputReader = _inputReader;
312 try {
313 while( true ) {
314 handlePlainText();
315 handleTag();
316 }
317 }
318 catch( EndOfXMLException x ) {
319 // The EndOfXMLException is purely used to drop out of the
320 // continuous loop.
321 }
322 }
323
324 /*
325 ------------------------------------------------------
326 Classes for handling the control of the reading stream
327 ------------------------------------------------------
328 */
329
330 /**
331 * Class to indicate the end of reading a plain text section
332 */
333 class InPlaintextReadEndChecker implements ReadEndChecker {
334
335 /**
336 * The method to issue a stop message when a start tag symbol (>) is encountered .
337 *
338 * @param c The character to check
339 * @return true if it is the symbol, false otehrwise.
340 */
341 public boolean shouldStop( int c ) {
342 return (c == '<');
343 }
344 }
345
346
347 /**
348 * Shared instance of the plain text end checker.
349 */
350 private final InPlaintextReadEndChecker inPlaintextReadEndChecker = new InPlaintextReadEndChecker();
351
352
353 /**
354 * Class to indicate the end of reading a tag section
355 */
356 class InTagReadEndChecker implements ReadEndChecker {
357
358 /**
359 * The method to issue a stop message when either a space of close tag symbol (<) is encountered .
360 *
361 * @param c The character to check.
362 * @return true if c is either symbol, false otehrwise.
363 */
364 public boolean shouldStop( int c ) {
365 return (c == '>');
366 }
367 }
368
369
370 /**
371 * Shared instance of the tag end checker.
372 */
373 private final InTagReadEndChecker inTagReadEndChecker = new InTagReadEndChecker();
374 }