Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/aendvari/common/osm/OsmReader.java


1   /*
2    * OsmReader.java
3    *
4    * Copyright (c) 2001, 2002 Aendvari, Ltd. All Rights Reserved.
5    *
6    * See the file LICENSE for terms of use.
7    *
8    */
9   
10  package com.aendvari.common.osm;
11  
12  import java.io.Reader;
13  
14  import java.util.ArrayList;
15  import java.util.Iterator;
16  import java.util.StringTokenizer;
17  
18  import com.aendvari.common.osm.*;
19  import com.aendvari.common.util.*;
20  
21  
22  /**
23   * <p>This class reads an "XML-like" file and converts it into an Osm tree.</p>
24   *
25   * <p>
26   * NOTE: The file uses a subset of the XML syntax. This class is <b>NOT</b> an
27   * XML parser, but instead the files used by this reader are able to be read
28   * by other XML parsers.
29   * </p>
30   *
31   * @author  Scott Milne
32   *
33   */
34  
35  public class OsmReader
36  {
37    /* Attributes */
38  
39    /** The current line number being read. */
40    protected static int currentLine;
41  
42    /** The line number of the last known "open" tag. */
43    protected static int lastOpenTagLine;
44  
45    /** The name of the last known "open" tag. */
46    protected static String lastOpenTag;
47  
48  
49    /* Constants. */
50  
51  
52    /** Special characters that need to be converted into their text values */
53    protected static String specialCharacters[][] = {
54  
55      { "&amp;", "&" },
56      { "&lt;", "<" },
57      { "&gt;", ">" },
58      { "&quot;", "\"" },
59      { "&apos;", "'" },
60  
61    };
62  
63    /** An interface for setting the current state of a read. */
64    protected interface State
65    {
66      public static final int Normal  = 1;
67      public static final int Tag    = 2;
68      public static final int Body  = 3;
69      public static final int Ignore  = 4;
70    }
71  
72  
73    /**
74     * Parses a buffered stream into the reader.
75     *
76     * @param    bufferedReader        A {@link Reader} instance.
77     *
78     * @throws                  Exception if parse fails.
79     *
80     */
81  
82    public static Osm read( Reader bufferedReader )
83      throws Exception
84    {
85      return executeRead(bufferedReader);
86    }
87  
88    /**
89     * Initializes the OSM with the data from the reader.
90     *
91     * @param    bufferedReader        A {@link Reader} instance.
92     *
93     */
94  
95    private static Osm executeRead( Reader bufferedReader )
96      throws Exception
97    {
98      Osm rootOsm = new Osm();
99  
100     readBuffer( bufferedReader, rootOsm, State.Normal );
101 
102     return rootOsm;
103   }
104 
105   /**
106    * Reads the buffer character by character matching with < and > to determine
107    * tags, and their contents.
108    *
109    * @param    reader          A {@link Reader} instance.
110    * @param    node          A {@link OsmNode} instance.
111    * @param    readState        The state in which the read is currently in.
112    *
113    */
114 
115   private static void readBuffer( Reader reader, OsmNode node, int readState )
116     throws Exception
117   {
118     int read;
119     char ch;
120     String data = "";
121 
122     // as long as we're not at the end of the buffer
123     while ((read = reader.read()) != -1)
124     {
125       ch = (char)read;
126 
127       //
128       // adjust the current line if possible
129       //
130 
131       // \r\n
132       if (ch == '\r' && peekMatch(reader, "\n"))
133       {
134         data += ch;
135         currentLine++;
136       }
137       // \r only
138       else if (ch == '\r' && !peekMatch(reader, "\n"))
139       {
140         data += ch;
141         currentLine++;
142       }
143       // \n only
144       else if (ch == '\n' && !data.endsWith("\r") )
145       {
146         data += ch;
147         currentLine++;
148       }
149 
150 
151       //
152       // collect each <tag> their body, and their children
153       //
154 
155       // if a < is found we're opening a tag
156       if (ch == '<')
157       {
158         // check to see if this is a comment
159         if (peekMatch(reader, "!--") || peekMatch(reader, "?"))
160         {
161           // continue reading, but set the state to ignore
162           readBuffer(reader,node,State.Ignore);
163         }
164         // as long as the current read state is not ignore
165         else if (readState != State.Ignore)
166         {
167           // if we were working on a body, then this is the start of the end tag for that body
168           if (readState == State.Body)
169           {
170             // get the current data from the node
171             String value = (String)node.getNodeValue();
172 
173             // convert the special characters of the body
174             // and add it to the previous node data
175             value += convertSpecialCharacters(data);
176 
177             // set the new body data into the node
178             node.setNodeValue(value);
179 
180             // reset the data string for the next tag
181             data = "";
182           }
183 
184           // add this character to the data string in case were also
185           // building another tag
186           data += ch;
187         }
188         // we're currently ignoring this data, so collect it up without parsing it
189         else
190         {
191           data += ch;
192         }
193       }
194       // if this character is found, we're closing a tag
195       else if (ch == '>')
196       {
197         data += ch;
198 
199         // check to see if the characters before this were within a comment
200         // if so, break out of this loop so that the ignore state will be reset
201         if ( data.endsWith("-->") || data.endsWith("?>") )
202         {
203           break;
204         }
205         // if the read state is not currently ignore
206         else if (readState != State.Ignore)
207         {
208           // check to see if this is a closing tag
209           // if so, then we are ending a body
210           if (data.startsWith("</"))
211           {
212             // get the name of the node from the tag
213             String nodeName = nodeNameFromTag(data);
214 
215             // if this tag is the same as the parent of the current node
216             // then we don't have a correct closing tag. Throw and error.
217             if (nodeName.equals(node.getParentNode().getNodeName()))
218             {
219               throw new Exception("Opening tag \""+lastOpenTag+"\" does not have a closing tag. Line "+lastOpenTagLine+".");
220             }
221 
222             break;
223           }
224           // otherwise we are starting a body
225           else
226           {
227             // create the new node
228             OsmNode newNode = nodeFromTag(node.getOwnerOsm(), data);
229             node.appendChild(newNode);
230 
231             // if this tag has a body, get it
232             if (!data.endsWith("/>"))
233             {
234               String nodeName = nodeNameFromTag(data);
235               lastOpenTag = nodeName;
236               lastOpenTagLine = currentLine;
237 
238               // continue reading, but set the state flag to body
239               readBuffer(reader,newNode,State.Body);
240             }
241 
242             // now extract the attributes from the tag
243             extractTagAttributes( newNode, data );
244 
245             // reset the data string for the next tag
246             data = "";
247           }
248         }
249       }
250       // collect every other character (usually body or ignored content)
251       else
252       {
253         data += ch;
254       }
255     }
256   }
257 
258   /**
259    * Extracts the name of a node from the tag.
260    *
261    * @param    tag            The &lt;tag&gt; to extract the name from.
262    *
263    * @return                The value within the tag representing the node name.
264    *
265    */
266 
267   private static String nodeNameFromTag( String tag )
268   {
269     String trimTag = tag.trim();
270 
271     // we only want the text up to the first space
272     // ie: <mytag attr="something">
273     // we only want "<mytag"
274     int endIndex = trimTag.indexOf(" ");
275     if (endIndex == -1)
276     {
277       // if there was a "/>" adjust the end index to before it
278       endIndex = trimTag.indexOf("/>");
279       if (endIndex == -1)
280       {
281         // othewise set the end index to the last character ">"
282         endIndex = trimTag.length() - 1;
283       }
284     }
285 
286     // determine where to start
287     int startIndex = 1;
288     if (trimTag.startsWith("</"))
289     {
290       startIndex = 2;
291     }
292 
293     String nodeName = trimTag.substring(startIndex,endIndex);
294 
295     return nodeName;
296   }
297 
298   /**
299    * Create a new OsmNode from the given &lt;tag&gt;.
300    *
301    * @param    node          The {@link Osm} to create a new {@link OsmNode} from.
302    * @param    tag            The &lt;tag&gt; to create an {@link OsmNode} from.
303    *
304    * @return                The new {@link OsmNode}.
305    *
306    */
307 
308   private static OsmNode nodeFromTag( Osm osm, String tag )
309   {
310     String nodeName = nodeNameFromTag(tag);
311     OsmNode newNode = osm.createNode(nodeName, "");
312     return newNode;
313   }
314 
315   /**
316    * Extract the attributes from a &lt;tag&gt;.
317    *
318    * @param    node          The {@link OsmNode} to place the attributes into.
319    * @param    tag            The &lt;tag&gt; to extract the attributes from.
320    *
321    */
322 
323   private static void extractTagAttributes( OsmNode node, String tag )
324     throws Exception
325   {
326     // strip off the < and /> from the tag
327     tag = tag.replace('<',' ');
328     tag = tag.replace('>',' ');
329     tag = tag.trim();
330 
331     // now that the brackets are gone, remove the / trailing or leading
332 
333     // ie: </tag> --> /tag
334     if (tag.startsWith("/"))
335     {
336       tag = tag.substring(1);
337     }
338 
339     // ie: <tag/> --> tag/
340     if (tag.endsWith("/"))
341     {
342       tag = tag.substring(0, tag.length()-1);
343     }
344     
345     // trim all whitespace again
346     tag = tag.trim();
347 
348     // parse path
349     StringTokenizer tokens = new StringTokenizer(tag, " ", false);
350 
351     while (tokens.hasMoreTokens())
352     {
353       // get attribute
354       String attribute = tokens.nextToken();
355 
356       if (attribute.equals(node.getNodeName()))
357       {
358         continue;
359       }
360 
361       // get the attribute name
362       int index;
363 
364       index = attribute.indexOf("=\"");
365       if (index != -1)
366       {
367         String attrName = attribute.substring(0, index);
368 
369         // get the attribute value
370         int oldIndex = index + 2;
371         index = attribute.indexOf("\"", oldIndex);
372         if (index != -1)
373         {
374           String attrValue = attribute.substring(oldIndex, index);
375 
376           // set the attribute into the node
377           node.setAttribute( attrName, attrValue );
378         }
379         else
380         {
381           throw new Exception("Invalid attribute definition 1. Line "+currentLine+".");
382         }
383       }
384       else
385       {
386         throw new Exception("Invalid attribute definition 2. Line "+currentLine+".");
387       }
388     }
389   }
390 
391   /**
392    * Peeks into the read stream to see if the given string matches the same number of characters
393    * in the upcoming stream.
394    *
395    * @param    reader          The <code>Reader</code> stream to peek into.
396    * @param    match          The string to attempt to match with.
397    *
398    * @return                True if the string matches.
399    *
400    */
401 
402   private static boolean peekMatch( Reader reader, String match )
403     throws Exception
404   {
405     String value = "";
406     int len = match.length();
407 
408     char ch;
409     int index = 0;
410 
411     // mark the buffer so we can return to the correct location
412     reader.mark(len+1);
413 
414     while ((ch=(char)reader.read())!=-1 && index<len)
415     {
416       // if the end of the file was found
417       if (ch == -1)
418       {
419         return false;
420       }
421 
422       value += ch;
423       index++;
424     }
425 
426     // reset back to original location
427     reader.reset();
428 
429     return value.equals(match);
430   }
431 
432   /**
433    * Converts the special characters of a string into their secondary form.
434    *
435    * @param    data          The string to convert special characters of.
436    *
437    * @return                Alternative form of string.
438    *
439    */
440 
441   private static String convertSpecialCharacters( String data )
442   {
443     int i;
444 
445     for (i=0; i<specialCharacters.length; i++)
446     {
447       data = replaceAll(data, specialCharacters[i][0], specialCharacters[i][1]);
448     }
449 
450     return data;
451   }
452 
453   /**
454    * Replaces all the matching values of a string with their replacement value.
455    *
456    * @param    source          The string to replace the values of.
457    * @param    original        The original value to search for.
458    * @param    replace          The string to replace the original value with.
459    *
460    * @return                Updated string.
461    *
462    */
463 
464   private static String replaceAll( String source, String original, String replace )
465   {
466     int index;
467 
468     while ((index=source.indexOf(original)) != -1)
469     {
470       String start = source.substring(0, index);
471       String end = source.substring(index+original.length(), source.length());
472 
473       source = start + replace + end;
474     }
475 
476     return source;
477   }
478 }
479