Source code: com/aendvari/common/osm/OsmReader.java
1 /*
2 * OsmReader.java
3 *
4 * Copyright (c) 2001, 2002 Aendvari, Ltd. All Rights Reserved.
5 *
6 * See the file LICENSE for terms of use.
7 *
8 */
9
10 package com.aendvari.common.osm;
11
12 import java.io.Reader;
13
14 import java.util.ArrayList;
15 import java.util.Iterator;
16 import java.util.StringTokenizer;
17
18 import com.aendvari.common.osm.*;
19 import com.aendvari.common.util.*;
20
21
22 /**
23 * <p>This class reads an "XML-like" file and converts it into an Osm tree.</p>
24 *
25 * <p>
26 * NOTE: The file uses a subset of the XML syntax. This class is <b>NOT</b> an
27 * XML parser, but instead the files used by this reader are able to be read
28 * by other XML parsers.
29 * </p>
30 *
31 * @author Scott Milne
32 *
33 */
34
35 public class OsmReader
36 {
37 /* Attributes */
38
39 /** The current line number being read. */
40 protected static int currentLine;
41
42 /** The line number of the last known "open" tag. */
43 protected static int lastOpenTagLine;
44
45 /** The name of the last known "open" tag. */
46 protected static String lastOpenTag;
47
48
49 /* Constants. */
50
51
52 /** Special characters that need to be converted into their text values */
53 protected static String specialCharacters[][] = {
54
55 { "&", "&" },
56 { "<", "<" },
57 { ">", ">" },
58 { """, "\"" },
59 { "'", "'" },
60
61 };
62
63 /** An interface for setting the current state of a read. */
64 protected interface State
65 {
66 public static final int Normal = 1;
67 public static final int Tag = 2;
68 public static final int Body = 3;
69 public static final int Ignore = 4;
70 }
71
72
73 /**
74 * Parses a buffered stream into the reader.
75 *
76 * @param bufferedReader A {@link Reader} instance.
77 *
78 * @throws Exception if parse fails.
79 *
80 */
81
82 public static Osm read( Reader bufferedReader )
83 throws Exception
84 {
85 return executeRead(bufferedReader);
86 }
87
88 /**
89 * Initializes the OSM with the data from the reader.
90 *
91 * @param bufferedReader A {@link Reader} instance.
92 *
93 */
94
95 private static Osm executeRead( Reader bufferedReader )
96 throws Exception
97 {
98 Osm rootOsm = new Osm();
99
100 readBuffer( bufferedReader, rootOsm, State.Normal );
101
102 return rootOsm;
103 }
104
105 /**
106 * Reads the buffer character by character matching with < and > to determine
107 * tags, and their contents.
108 *
109 * @param reader A {@link Reader} instance.
110 * @param node A {@link OsmNode} instance.
111 * @param readState The state in which the read is currently in.
112 *
113 */
114
115 private static void readBuffer( Reader reader, OsmNode node, int readState )
116 throws Exception
117 {
118 int read;
119 char ch;
120 String data = "";
121
122 // as long as we're not at the end of the buffer
123 while ((read = reader.read()) != -1)
124 {
125 ch = (char)read;
126
127 //
128 // adjust the current line if possible
129 //
130
131 // \r\n
132 if (ch == '\r' && peekMatch(reader, "\n"))
133 {
134 data += ch;
135 currentLine++;
136 }
137 // \r only
138 else if (ch == '\r' && !peekMatch(reader, "\n"))
139 {
140 data += ch;
141 currentLine++;
142 }
143 // \n only
144 else if (ch == '\n' && !data.endsWith("\r") )
145 {
146 data += ch;
147 currentLine++;
148 }
149
150
151 //
152 // collect each <tag> their body, and their children
153 //
154
155 // if a < is found we're opening a tag
156 if (ch == '<')
157 {
158 // check to see if this is a comment
159 if (peekMatch(reader, "!--") || peekMatch(reader, "?"))
160 {
161 // continue reading, but set the state to ignore
162 readBuffer(reader,node,State.Ignore);
163 }
164 // as long as the current read state is not ignore
165 else if (readState != State.Ignore)
166 {
167 // if we were working on a body, then this is the start of the end tag for that body
168 if (readState == State.Body)
169 {
170 // get the current data from the node
171 String value = (String)node.getNodeValue();
172
173 // convert the special characters of the body
174 // and add it to the previous node data
175 value += convertSpecialCharacters(data);
176
177 // set the new body data into the node
178 node.setNodeValue(value);
179
180 // reset the data string for the next tag
181 data = "";
182 }
183
184 // add this character to the data string in case were also
185 // building another tag
186 data += ch;
187 }
188 // we're currently ignoring this data, so collect it up without parsing it
189 else
190 {
191 data += ch;
192 }
193 }
194 // if this character is found, we're closing a tag
195 else if (ch == '>')
196 {
197 data += ch;
198
199 // check to see if the characters before this were within a comment
200 // if so, break out of this loop so that the ignore state will be reset
201 if ( data.endsWith("-->") || data.endsWith("?>") )
202 {
203 break;
204 }
205 // if the read state is not currently ignore
206 else if (readState != State.Ignore)
207 {
208 // check to see if this is a closing tag
209 // if so, then we are ending a body
210 if (data.startsWith("</"))
211 {
212 // get the name of the node from the tag
213 String nodeName = nodeNameFromTag(data);
214
215 // if this tag is the same as the parent of the current node
216 // then we don't have a correct closing tag. Throw and error.
217 if (nodeName.equals(node.getParentNode().getNodeName()))
218 {
219 throw new Exception("Opening tag \""+lastOpenTag+"\" does not have a closing tag. Line "+lastOpenTagLine+".");
220 }
221
222 break;
223 }
224 // otherwise we are starting a body
225 else
226 {
227 // create the new node
228 OsmNode newNode = nodeFromTag(node.getOwnerOsm(), data);
229 node.appendChild(newNode);
230
231 // if this tag has a body, get it
232 if (!data.endsWith("/>"))
233 {
234 String nodeName = nodeNameFromTag(data);
235 lastOpenTag = nodeName;
236 lastOpenTagLine = currentLine;
237
238 // continue reading, but set the state flag to body
239 readBuffer(reader,newNode,State.Body);
240 }
241
242 // now extract the attributes from the tag
243 extractTagAttributes( newNode, data );
244
245 // reset the data string for the next tag
246 data = "";
247 }
248 }
249 }
250 // collect every other character (usually body or ignored content)
251 else
252 {
253 data += ch;
254 }
255 }
256 }
257
258 /**
259 * Extracts the name of a node from the tag.
260 *
261 * @param tag The <tag> to extract the name from.
262 *
263 * @return The value within the tag representing the node name.
264 *
265 */
266
267 private static String nodeNameFromTag( String tag )
268 {
269 String trimTag = tag.trim();
270
271 // we only want the text up to the first space
272 // ie: <mytag attr="something">
273 // we only want "<mytag"
274 int endIndex = trimTag.indexOf(" ");
275 if (endIndex == -1)
276 {
277 // if there was a "/>" adjust the end index to before it
278 endIndex = trimTag.indexOf("/>");
279 if (endIndex == -1)
280 {
281 // othewise set the end index to the last character ">"
282 endIndex = trimTag.length() - 1;
283 }
284 }
285
286 // determine where to start
287 int startIndex = 1;
288 if (trimTag.startsWith("</"))
289 {
290 startIndex = 2;
291 }
292
293 String nodeName = trimTag.substring(startIndex,endIndex);
294
295 return nodeName;
296 }
297
298 /**
299 * Create a new OsmNode from the given <tag>.
300 *
301 * @param node The {@link Osm} to create a new {@link OsmNode} from.
302 * @param tag The <tag> to create an {@link OsmNode} from.
303 *
304 * @return The new {@link OsmNode}.
305 *
306 */
307
308 private static OsmNode nodeFromTag( Osm osm, String tag )
309 {
310 String nodeName = nodeNameFromTag(tag);
311 OsmNode newNode = osm.createNode(nodeName, "");
312 return newNode;
313 }
314
315 /**
316 * Extract the attributes from a <tag>.
317 *
318 * @param node The {@link OsmNode} to place the attributes into.
319 * @param tag The <tag> to extract the attributes from.
320 *
321 */
322
323 private static void extractTagAttributes( OsmNode node, String tag )
324 throws Exception
325 {
326 // strip off the < and /> from the tag
327 tag = tag.replace('<',' ');
328 tag = tag.replace('>',' ');
329 tag = tag.trim();
330
331 // now that the brackets are gone, remove the / trailing or leading
332
333 // ie: </tag> --> /tag
334 if (tag.startsWith("/"))
335 {
336 tag = tag.substring(1);
337 }
338
339 // ie: <tag/> --> tag/
340 if (tag.endsWith("/"))
341 {
342 tag = tag.substring(0, tag.length()-1);
343 }
344
345 // trim all whitespace again
346 tag = tag.trim();
347
348 // parse path
349 StringTokenizer tokens = new StringTokenizer(tag, " ", false);
350
351 while (tokens.hasMoreTokens())
352 {
353 // get attribute
354 String attribute = tokens.nextToken();
355
356 if (attribute.equals(node.getNodeName()))
357 {
358 continue;
359 }
360
361 // get the attribute name
362 int index;
363
364 index = attribute.indexOf("=\"");
365 if (index != -1)
366 {
367 String attrName = attribute.substring(0, index);
368
369 // get the attribute value
370 int oldIndex = index + 2;
371 index = attribute.indexOf("\"", oldIndex);
372 if (index != -1)
373 {
374 String attrValue = attribute.substring(oldIndex, index);
375
376 // set the attribute into the node
377 node.setAttribute( attrName, attrValue );
378 }
379 else
380 {
381 throw new Exception("Invalid attribute definition 1. Line "+currentLine+".");
382 }
383 }
384 else
385 {
386 throw new Exception("Invalid attribute definition 2. Line "+currentLine+".");
387 }
388 }
389 }
390
391 /**
392 * Peeks into the read stream to see if the given string matches the same number of characters
393 * in the upcoming stream.
394 *
395 * @param reader The <code>Reader</code> stream to peek into.
396 * @param match The string to attempt to match with.
397 *
398 * @return True if the string matches.
399 *
400 */
401
402 private static boolean peekMatch( Reader reader, String match )
403 throws Exception
404 {
405 String value = "";
406 int len = match.length();
407
408 char ch;
409 int index = 0;
410
411 // mark the buffer so we can return to the correct location
412 reader.mark(len+1);
413
414 while ((ch=(char)reader.read())!=-1 && index<len)
415 {
416 // if the end of the file was found
417 if (ch == -1)
418 {
419 return false;
420 }
421
422 value += ch;
423 index++;
424 }
425
426 // reset back to original location
427 reader.reset();
428
429 return value.equals(match);
430 }
431
432 /**
433 * Converts the special characters of a string into their secondary form.
434 *
435 * @param data The string to convert special characters of.
436 *
437 * @return Alternative form of string.
438 *
439 */
440
441 private static String convertSpecialCharacters( String data )
442 {
443 int i;
444
445 for (i=0; i<specialCharacters.length; i++)
446 {
447 data = replaceAll(data, specialCharacters[i][0], specialCharacters[i][1]);
448 }
449
450 return data;
451 }
452
453 /**
454 * Replaces all the matching values of a string with their replacement value.
455 *
456 * @param source The string to replace the values of.
457 * @param original The original value to search for.
458 * @param replace The string to replace the original value with.
459 *
460 * @return Updated string.
461 *
462 */
463
464 private static String replaceAll( String source, String original, String replace )
465 {
466 int index;
467
468 while ((index=source.indexOf(original)) != -1)
469 {
470 String start = source.substring(0, index);
471 String end = source.substring(index+original.length(), source.length());
472
473 source = start + replace + end;
474 }
475
476 return source;
477 }
478 }
479