Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/memoire/xml/XmlParser.java


1   /**
2    * @modification 2001-10-25
3    * @statut       unstable
4    * @file         XmlParser.java
5    * @version      0.04
6    * @author       Guillaume Desnoix
7    * @email        guillaume@desnoix.com
8    * @license      GNU General Public License 2 (GPL2)
9    * @copyright    1998-2001 Guillaume Desnoix
10   */
11  
12  package com.memoire.xml;
13  import  com.memoire.xml.*;
14  
15  import java.io.*;
16  import java.util.*;
17  
18  public class XmlParser
19         implements XmlListener
20  {
21    public static final int XML      =0;
22    public static final int HTML     =1;
23    public static final int TXT      =2;
24  
25    private Reader      in_;
26    private char[]      buf_;
27    private int         nbc_;
28    private int         lineno_,charno_;
29    private String      origin_;
30    private int         mode_;
31    private XmlListener listener_;
32  
33    public XmlParser(String _origin) throws IOException
34    {
35      this(new FileReader(_origin),_origin,XML);
36    }
37  
38    public XmlParser(String _origin,int _mode) throws IOException
39    {
40      this(new FileReader(_origin),_origin,_mode);
41    }
42  
43    public XmlParser(File _origin) throws IOException
44    {
45      this(new FileReader(_origin),_origin.getName(),XML);
46    }
47  
48    public XmlParser(File _origin, int _mode) throws IOException
49    {
50      this(new FileReader(_origin),_origin.getName(),_mode);
51    }
52  
53    public XmlParser(Reader _in, String _origin)
54    {
55      this(_in,_origin,XML);
56    }
57  
58    public XmlParser(Reader _in, String _origin, int _mode)
59    {
60      in_      =_in;
61      buf_     =new char[1024];
62      nbc_     =0;
63      origin_  =_origin;
64      lineno_  =1;
65      charno_  =1;
66      mode_    =_mode;
67      listener_=this;
68    }
69  
70    protected static final String replace(String _s, String _a, String _b)
71    {
72      String r=_s;
73      int i=0;
74      while((i=r.indexOf(_a,i))>=0)
75      {
76        r=r.substring(0,i)+_b+r.substring(i+_a.length());
77        i=i+_b.length();
78      }
79      return r;
80    }
81  
82    private boolean ready() throws IOException
83    {
84      while(in_.ready()&&isSpace(peekC())) readC();
85      return in_.ready();
86    }
87  
88    public int getMode()
89    {
90      return mode_;
91    }
92  
93    public void setMode(int _mode)
94    {
95      mode_=_mode;
96    }
97  
98    public String origin() { return origin_; }
99    public int    lineno() { return lineno_; }
100   public int    charno() { return charno_; }
101 
102   private char readC() throws IOException
103   {
104     char r;
105 
106     if(nbc_>0)
107     {
108       nbc_--;
109       r=buf_[nbc_];
110     }
111     else
112     {
113       int c=in_.read();
114       if(c==-1) throw new EOFException();
115       r=(char)c;
116       if(r=='\n') { charno_=1; lineno_++; }
117       else charno_++;
118     }
119 
120     return r;
121   }
122 
123   private char peekC()
124   {
125     char r;
126 
127     try { r=readC(); putC(r); }
128     catch(IOException ex) { r=(char)-1; }
129 
130     return r;
131   }
132     
133   private void putC(char _c)
134   {
135      buf_[nbc_]=_c;
136      nbc_++;
137   }
138 
139   private static final boolean isSpace(char c)
140   {
141     return Character.isWhitespace(c);
142   }
143 
144   private static final boolean isLetterOrDigit(char c)
145   {
146     return Character.isLetterOrDigit(c);
147   }
148 
149   private int tag=0;
150 
151   private String readToken0() throws IOException
152   {
153     char    c;
154     boolean espaces=false;
155     while(isSpace(c=readC())) espaces=true;
156 
157     StringBuffer sb=new StringBuffer(10);
158 
159     if(getMode()==TXT)
160     {
161       sb.append(c);
162       if(isLetterOrDigit(c))
163       {
164   while(isLetterOrDigit(c=peekC())||(c=='-')||(c=='_'))
165     sb.append(readC());
166         if(c=='@')
167   {
168     sb.append(readC());
169     while(isLetterOrDigit(c=peekC())||(c=='.')||(c=='-')||(c=='_'))
170       sb.append(readC());
171         }
172   else
173   if((c==':')&&
174      (  "http".equals(sb.toString())
175       ||"mailto".equals(sb.toString())
176       ||"ftp".equals(sb.toString())))
177     while(!isSpace(c=peekC()))
178       sb.append(readC());
179       }
180     }
181     else
182     if((tag==0)&&(c=='<'))
183     {
184       sb.append(c);
185       if(peekC()=='!')
186       {
187   sb.append(readC());
188   char t=readC();
189   if((t=='-')&&(peekC()=='-'))
190   {
191     putC(t);
192     do { sb.append(readC()); }
193     while(!sb.toString().endsWith("-->"));
194     tag--;
195   }
196   else putC(t);
197       }
198       else
199       if(peekC()=='?')
200   sb.append(readC());
201       else
202       if(peekC()=='/')
203   sb.append(readC());
204       tag++;
205     }
206     else
207     if((tag!=0)&&(c=='>'))
208     {
209       sb.append(c);
210       tag--;
211     }
212     else
213     if((tag!=0)&&((c=='?')||(c=='/'))&&(peekC()=='>'))
214     {
215       sb.append(c);
216       sb.append(readC());
217       tag--;
218     }
219     else
220     if(c=='&')
221     {
222       sb.append(c);
223       do { c=readC(); sb.append(c); } while(c!=';');
224     }
225     else
226     if((tag!=0)&&(c=='"'))
227     {
228       sb.append(c);
229       do { c=readC(); sb.append(c); } while(c!='"');
230     }
231     else
232     if((tag!=0)&&(c=='\''))
233     {
234       sb.append(c);
235       do { c=readC(); sb.append(c); } while(c!='\'');
236     }
237     else
238     if((tag!=0)&&(c=='['))
239     {
240       sb.append(c);
241       do { c=readC(); sb.append(c); } while(c!=']');
242     }
243     else
244     if((tag!=0)&&isLetterOrDigit(c))
245     {
246       sb.append(c);
247       while(isLetterOrDigit(c=peekC())
248       ||(c=='.')||(c=='-')||(c=='_')||(c==':'))
249   { sb.append(readC()); }
250 
251       if((getMode()==HTML)&&(peekC()=='%'))
252   { sb.append(readC()); }
253     }
254     else
255     if(tag!=0)
256     {
257       sb.append(c);
258 
259       if((getMode()==HTML)
260    &&((c=='#')||(c=='+')||(c=='-')))
261       {
262   while(isLetterOrDigit(c=peekC())
263         ||(c=='.')||(c=='-')||(c=='_')||(c==':'))
264   { sb.append(readC()); }
265       }
266     }
267     else
268     {
269       if((getMode()==HTML)&&espaces) sb.append(' ');
270       sb.append(c);
271       while(((c=peekC())!='<')&&(c!=-1)) sb.append(readC());
272     }
273 
274     //System.err.println("::"+tag+" "+sb);
275     return sb.toString();
276   }
277 
278   private Stack pile_=new Stack();
279 
280   private String readToken() throws IOException
281   {
282     String r;
283 
284     do
285     {
286       if(!pile_.empty()) r=(String)pile_.pop();
287       else               r=readToken0();
288     }
289     while(r.startsWith("<!--"));
290 
291     return r;
292   }
293 
294   private void pushToken(String _token)
295   {
296     pile_.push(_token);
297   }
298 
299   public void parse() throws IOException
300   {
301     try
302     {
303       pile_.removeAllElements();
304 
305       while(true)
306       {
307   String e=readToken();
308   listener_.location(origin_,lineno_,charno_);
309 
310   if(getMode()==TXT)
311   {
312     listener_.text(e);
313   }
314   else
315   if("<".equals(e))
316   {
317     e=readToken();
318     if(getMode()==HTML) e=e.toUpperCase();
319 
320     String t=e;
321 
322     while(true)
323     {
324       e=readToken();
325       if(">".equals(e)||"/>".equals(e)) break;
326 
327       String a=e;
328       e=readToken(); // must be '='
329 
330       if(getMode()==HTML)
331       {
332         if(!e.equals("="))
333         {
334     pushToken(e);
335     e="TRUE";
336         }
337         else
338     e=readToken();
339         //System.err.println(a+"="+e);
340       }
341       else
342       {
343         if(!e.equals("="))
344     listener_.error("Missing value for Attribute '"+a+"'");
345         else
346     e=readToken();
347       }
348 
349       if((getMode()==HTML)&&("#".equals(e)))
350         e+=readToken();
351         
352       if(e.length()>=2)
353       {
354         char c=e.charAt(0);
355         if((c=='"'||c=='\'')&&(c==e.charAt(e.length()-1)))
356     e=e.substring(1,e.length()-1);
357         else
358     if(getMode()!=HTML)
359       listener_.error("Invalid Attribute Value '"+a+"': "+e);
360       }
361       else
362       if(getMode()!=HTML)
363     listener_.error("Invalid Attribute Value '"+a+"': "+e);
364 
365       if(getMode()==HTML)  a=a.toUpperCase();
366       //System.err.println("ATT: "+a+" ~ "+e);
367       listener_.attribute(a,e);
368     }
369 
370     //if(getMode()==HTML) t=t.toUpperCase();
371     //System.err.println("OPEN TAG: "+t);
372     listener_.startElement(t);
373 
374     if(">".equals(e))
375     {
376       if((getMode()==HTML)
377          &&(  "BR"   .equals(t)
378       ||"HR"   .equals(t)
379       ||"IMG"  .equals(t)
380       ||"LINK" .equals(t)
381       ||"INPUT".equals(t)
382       ||"META" .equals(t)))
383         listener_.endElement(t);
384     }
385     else
386     if("/>".equals(e))
387     {
388       //System.err.println("CLOSE TAG: "+t);
389       listener_.endElement(t);
390     }
391   }
392   else
393   if("</".equals(e))
394   {
395     e=readToken();
396     if(getMode()==HTML) e=e.toUpperCase();
397     //System.err.println("CLOSE TAG: "+e);
398     if(  (getMode()==HTML)
399        &&(  "BR"   .equals(e)
400     ||"HR"   .equals(e)
401     ||"IMG"  .equals(e)
402     ||"LINK" .equals(e)
403     ||"INPUT".equals(e)
404     ||"META" .equals(e)))
405         ;
406     else
407       listener_.endElement(e);
408     e=readToken();
409   }
410   else
411   if("<?".equals(e))
412   {
413     //System.err.println("PROCESSING");
414     do { e=readToken(); } while(!"?>".equals(e));
415   }
416   else
417         if("<!".equals(e))
418         {
419     //System.err.println("DECLARATION");
420     do { e=readToken(); } while(!">".equals(e));
421   }
422   else
423   {
424     //System.err.println("TEXT: "+e);
425     listener_.text(e);
426   }
427       }
428     }
429     catch(EOFException ex) { }
430   }
431 
432   public void setXmlListener(XmlListener _l)
433   {
434     if(_l==null) _l=this;
435     listener_=_l;
436   }
437 
438   public void location(String _origin, int _lineno, int _charno)
439   { }
440 
441   public void startElement(String _tag)
442   { System.out.println("OPEN TAG  : "+_tag); }
443 
444   public void endElement(String _tag)
445   { System.out.println("CLOSE TAG : "+_tag); }
446 
447   public void attribute(String _name,String _value)
448   { System.out.println("ATTRIBUTE : "+_name+" WITH VALUE : "+_value); }
449 
450   public void text(String _data)
451   { System.out.println("TEXT DATA : "+_data); }
452 
453   public void error(String _message)
454   { System.out.println("ERROR     : "+_message+" IN "+origin()); }
455 
456   public static void main(String[] _args)
457   {
458     for(int i=0;i<_args.length;i++)
459     {
460       try
461       {
462         String f=_args[i];
463   int    m=XML;
464   if(f.endsWith(".html")||f.endsWith(".htm")) m=HTML;
465   if(f.endsWith(".txt"))                      m=TXT;
466   System.err.println("FILE: "+f+" MODE: "+m);
467   XmlParser p=new XmlParser(f,m);
468   p.parse();
469       }
470       catch(Exception ex)
471       {
472   ex.printStackTrace();
473   //System.err.println(ex);
474       }
475     }
476   }
477 }
478