Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/hp/hpl/jena/rdf/arp/NTriple.java


1   /*
2    *  (c) Copyright 2001, 2002, 2003, 2004, 2005 Hewlett-Packard Development Company, LP
3    *  All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions
7    * are met:
8    * 1. Redistributions of source code must retain the above copyright
9    *    notice, this list of conditions and the following disclaimer.
10   * 2. Redistributions in binary form must reproduce the above copyright
11   *    notice, this list of conditions and the following disclaimer in the
12   *    documentation and/or other materials provided with the distribution.
13   * 3. The name of the author may not be used to endorse or promote products
14   *    derived from this software without specific prior written permission.
15  
16   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19   * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26   
27   * * $Id: NTriple.java,v 1.14 2005/02/21 12:09:15 andy_seaborne Exp $
28     
29     AUTHOR:  Jeremy J. Carroll
30  */
31  /*
32   * XML2NTriple.java
33   *
34   * Created on July 13, 2001, 10:06 PM
35   */
36  
37  package com.hp.hpl.jena.rdf.arp;
38  
39  import java.net.*;
40  import java.io.*;
41  
42  import org.xml.sax.*;
43  /** A command line interface into ARP.
44   * Creates NTriple's or just error messages.
45   * <pre>
46   * java &lt;class-path&gt; com.hp.hpl.jena.arp.NTriple ( [ -[xstfu]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )...
47   * </pre>
48   * <p>
49   * &lt;class-path&gt; should contain <code>jena.jar</code>, <code>xerces.jar</code>,  and <code>icu4j.jar</code> or equivalents.
50   * </p>
51   * All options, files and URLs can be intemingled in any order.
52   * They are processed from left-to-right.
53   * <dl>
54   * file    </dt><dd>  Converts (embedded) RDF in XML file into N-triples
55   * </dd><dt>
56   * url  </dt><dd>     Converts (embedded) RDF from URL into N-triples
57   * </dd><dt>
58   * -b uri </dt><dd>   Sets XML Base to the absolute URI.
59   * </dd><dt>
60   * -r    </dt><dd>    Content is RDF (no embedding, rdf:RDF tag may be omitted).
61   * </dd><dt>
62   * -t  </dt><dd>      No n-triple output, error checking only.
63   * </dd><dt>
64   * -x   </dt><dd>     Lax mode - warnings are suppressed.
65   * </dd><dt>
66   * -s    </dt><dd>    Strict mode - most warnings are errors.
67   * </dd><dt>
68   * -u     </dt><dd>   Allow unqualified attributes (defaults to warning).
69   * </dd><dt>
70   * -f    </dt><dd>    All errors are fatal - report first one only.
71   * </dd><dt>
72   * -n    </dt><dd>    Show line numbers of each triple.
73   * </dd><dt>
74   * -b url </dt><dd>   Sets XML Base to the absolute url.
75   * </dd><dt>
76   * -e NNN[,NNN...]</dt><dd>
77   * Treats numbered warning conditions as errrors.
78   * </dd><dt>
79   * -w NNN[,NNN...]</dt><dd>
80   * Treats numbered error conditions as warnings.
81   * </dd><dt>
82   * -i NNN[,NNN...]
83   * </dt><dd>
84   * Ignores numbered error/warning conditions.
85   * </dl>
86   * @author jjc
87   */
88  public class NTriple implements ARPErrorNumbers {
89  
90    private static StringBuffer line = new StringBuffer();
91    private static ARP arp;
92    private static String xmlBase = null;
93    private static boolean numbers = false;
94    /** Starts an RDF/XML to NTriple converter.
95     * @param args The command-line arguments.
96     */
97    static public void main(String args[]) {
98      mainEh(args, null, null);
99    }
100   static StatementHandler andMeToo = null;
101   /** Starts an RDF/XML to NTriple converter,
102    * using an error handler, and an ARPHandler.
103    * Statements get processed both by this class,
104    * and by the passed in StatementHandler
105    * @param args The command-line arguments.
106    * @param eh Can be null.
107    * @param ap Can be null.
108    */
109   static public void mainEh(String args[], ErrorHandler eh, ARPEventHandler ap) {
110     boolean doneOne = false;
111     startMem = -1;
112     andMeToo = ap;
113     //SH sh = new SH();
114     int i;
115     arp = new ARP();
116     ARPHandlers handlers = arp.getHandlers();
117     handlers.setStatementHandler(getSH(true));
118     if (ap != null) {
119       handlers.setNamespaceHandler(ap);
120       handlers.setExtendedHandler(ap);
121     }
122     if (eh != null)
123       handlers.setErrorHandler(eh);
124 
125     for (i = 0; i < args.length - 1; i++) {
126       if (args[i].startsWith("-")) {
127         i += processOpts(args[i].substring(1), args[i + 1]);
128       } else {
129         doneOne = true;
130         process(args[i]);
131       }
132     }
133     if (args.length > 0) {
134       if (args[i].startsWith("-")) {
135         if (doneOne || processOpts(args[i].substring(1), "100") == 1)
136           usage();
137       } else {
138         doneOne = true;
139         process(args[i]);
140       }
141     }
142     if (!doneOne) {
143       process(System.in, "http://example.org/stdin", "standard input");
144     }
145     if ( startMem != -1) {
146       rt.gc();
147       System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
148       rt.gc();
149       System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
150       rt.gc();
151       System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
152       rt.gc();
153       System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
154     }
155   }
156 
157   /**
158    * @param b false for quiet.
159    * @return
160    */
161   private static StatementHandler getSH(boolean b) {
162     StatementHandler rslt = b?(StatementHandler)new SH():new NoSH();
163     if (andMeToo!=null)
164       rslt = new TwoSH(rslt,andMeToo);
165     return rslt;
166   }
167 
168   static private void lineNumber() {
169     if (numbers) {
170       Locator locator = arp.getLocator();
171       if (locator != null)
172         print(
173           "# "
174             + locator.getSystemId()
175             + ":"
176             + locator.getLineNumber()
177             + "("
178             + locator.getColumnNumber()
179             + ")\n");
180     }
181   }
182 
183   /*
184    * Options:
185    *  -x   Lax, Warnings suppressed
186    *  -s   Strict, Warnings are errors
187    *  -f   All errors are fatal.
188    *  -u   Suppress unqualified attribute warnings
189    *  -t   Error checking only, no n-triple output
190    *  -b:  set xml:base (same for all files?)
191    *  -e:  convert numbered warnings to errors
192    *  -i:  suppress numbered warnings
193    *  -w:  convert numbered errors/suppressed warnings to warnings
194    *  -n:  give line numbers
195    *
196    */
197   static void usage() {
198     System.err.println(
199       "java <class-path> "
200         + NTriple.class.getName()
201         + " ( [ -[xstfu]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )... ");
202     System.err.println(
203       "    All options, files and URLs can be intemingled in any order.");
204     System.err.println("    They are processed from left-to-right.");
205     System.err.println(
206       "    file      Converts (embedded) RDF in XML file into N-triples");
207     System.err.println(
208       "    url       Converts (embedded) RDF from URL into N-triples");
209     System.err.println("    -b uri    Sets XML Base to the absolute URI.");
210     System.err.println(
211       "    -r        Content is RDF (no embedding, rdf:RDF tag may be omitted).");
212     System.err.println(
213       "    -t        No n-triple output, error checking only.");
214     System.err.println("    -x        Lax mode - warnings are suppressed.");
215     System.err.println(
216       "    -s        Strict mode - most warnings are errors.");
217     System.err.println("    -n        Show line and column numbers.");
218     System.err.println(
219       "    -u        Allow unqualified attributes (defaults to warning).");
220     System.err.println(
221       "    -f        All errors are fatal - report first one only.");
222     System.err.println("    -b url    Sets XML Base to the absolute url.");
223     System.err.println("    -e NNN[,NNN...]");
224     System.err.println(
225       "              Treats numbered warning conditions as errrors.");
226     System.err.println("    -w NNN[,NNN...]");
227     System.err.println(
228       "              Treats numbered error conditions as warnings.");
229     System.err.println("    -i NNN[,NNN...]");
230     System.err.println(
231       "              Ignores numbered error/warning conditions.");
232     System.exit(1);
233   }
234   static final private Runtime rt = Runtime.getRuntime();
235     static private int startMem = -1;
236   static private int processOpts(String opts, String nextArg) {
237     boolean usedNext = false;
238     ARPOptions options = arp.getOptions();
239     for (int i = 0; i < opts.length(); i++) {
240       char opt = opts.charAt(i);
241       if ("beiwD".indexOf(opt) != -1) {
242         if (usedNext)
243           usage();
244         usedNext = true;
245       }
246       switch (opt) {
247         case 'D':
248           final int nStatements = Integer.parseInt(nextArg);
249  rt.gc(); rt.gc(); 
250  startMem = (int)(rt.totalMemory()-rt.freeMemory());
251         arp.getHandlers().setStatementHandler(new StatementHandler(){
252 int debugC = 0;
253 
254           public void statement(AResource subj, AResource pred, AResource obj) {
255             statement(null,null,(ALiteral)null);
256             
257           }
258 
259           public void statement(AResource subj, AResource pred, ALiteral lit) {
260             if (++debugC%100 == 0) {
261               System.out.println("T: " + debugC);
262               rt.gc();
263               System.out.println("M1: "+ (rt.totalMemory()-rt.freeMemory()-startMem));
264               rt.gc();
265               System.out.println("M2: " + (rt.totalMemory()-rt.freeMemory()-startMem));
266             } 
267             if ( debugC == 1 ){
268               rt.gc(); rt.gc(); 
269               startMem = (int)(rt.totalMemory()-rt.freeMemory());
270             }
271             if (debugC == nStatements) {
272 
273                 rt.gc();
274                 System.err.println("Kill me now.");
275               try {
276                 Thread.sleep(200000);
277               }
278               catch (Exception e){
279               }
280               
281             }
282               
283             
284           }
285         });
286           break;
287         case 'x' :
288           options.setLaxErrorMode();
289           break;
290         case 's' :
291           options.setStrictErrorMode();
292           break;
293         case 't' :
294           arp.getHandlers().setStatementHandler(getSH(false));
295           break;
296         case 'r' :
297           options.setEmbedding(false);
298           break;
299         case 'n' :
300           numbers = true;
301           break;
302         case 'b' :
303           xmlBase = nextArg;
304           break;
305         case 'e' :
306           setErrorMode(nextArg, EM_ERROR);
307           break;
308         case 'i' :
309           setErrorMode(nextArg, EM_IGNORE);
310           break;
311         case 'w' :
312           setErrorMode(nextArg, EM_WARNING);
313           break;
314         case 'f' :
315           for (int j = 0; j < 400; j++) {
316             if (options.setErrorMode(j, -1) == EM_ERROR)
317               options.setErrorMode(j, EM_FATAL);
318           }
319           break;
320         case 'u' :
321           options.setErrorMode(WARN_UNQUALIFIED_ATTRIBUTE, EM_IGNORE);
322           options.setErrorMode(WARN_UNQUALIFIED_RDF_ATTRIBUTE, EM_IGNORE);
323           break;
324         default :
325           usage();
326       }
327     }
328     return usedNext ? 1 : 0;
329   }
330 
331   static private void setErrorMode(String numbers, int mode) {
332     int n[] = new int[3];
333     int j = 0;
334     numbers += ",";
335     for (int i = 0; i < numbers.length(); i++) {
336       char c = numbers.charAt(i);
337       switch (c) {
338         case '0' :
339         case '1' :
340         case '2' :
341         case '3' :
342         case '4' :
343         case '5' :
344         case '6' :
345         case '7' :
346         case '8' :
347         case '9' :
348           if (j == 3)
349             usage();
350           n[j++] = c - '0';
351           break;
352         case ' ' :
353         case ';' :
354         case ',' :
355           if (i == 0)
356             usage();
357           switch (j) {
358             case 0 :
359               break;
360             case 3 :
361               arp.getOptions().setErrorMode(
362                 n[0] * 100 + n[1] * 10 + n[2],
363                 mode);
364               j = 0;
365               break;
366             default :
367               usage();
368           }
369           break;
370         default :
371           usage();
372       }
373     }
374   }
375 
376   static private void process(String surl) {
377     InputStream in;
378     String xmlBase;
379 
380     URL url;
381     String baseURL;
382 
383     try {
384       File ff = new File(surl);
385       in = new FileInputStream(ff);
386       url = ff.toURL();
387       baseURL = url.toExternalForm();
388       if (baseURL.startsWith("file:/")
389         && !baseURL.startsWith("file://")) {
390         baseURL = "file://" + baseURL.substring(5);
391       }
392     } catch (Exception ignore) {
393       try {
394         url = new URL(surl);
395         in = url.openStream();
396         baseURL = url.toExternalForm();
397       } catch (Exception e) {
398         System.err.println("ARP: Failed to open: " + surl);
399         System.err.println(
400           "    " + ParseException.formatMessage(ignore));
401         System.err.println("    " + ParseException.formatMessage(e));
402         return;
403       }
404     }
405     process(in, baseURL, surl);
406   }
407   static private void process(InputStream in, String xmlBasex, String surl) {
408     String xmlBasey = xmlBase == null ? xmlBasex : xmlBase;
409     try {
410       arp.load(in, xmlBasey);
411     } catch (IOException e) {
412       System.err.println(
413         "Error: " + surl + ": " + ParseException.formatMessage(e));
414     } catch (SAXParseException e) {
415       // already reported.
416     } catch (SAXException sax) {
417       System.err.println(
418         "Error: " + surl + ": " + ParseException.formatMessage(sax));
419     }
420   }
421   private static class TwoSH implements StatementHandler {
422     final StatementHandler a, b;
423     public void statement(AResource subj, AResource pred, AResource obj) {
424       a.statement(subj, pred, obj);
425       b.statement(subj, pred, obj);
426     }
427     public void statement(AResource subj, AResource pred, ALiteral lit) {
428       a.statement(subj, pred, lit);
429       b.statement(subj, pred, lit);
430     }
431     TwoSH(StatementHandler A, StatementHandler B) {
432       a = A;
433       b = B;
434     }
435   }
436   private static class NoSH implements StatementHandler {
437     private int ix = 0;
438     private void userData(AResource n){
439         if (n.isAnonymous()) {
440             n.setUserData(new Integer(ix++));
441         }
442     }
443       public void statement(AResource subj, AResource pred, AResource obj) {
444         userData(subj);
445         userData(pred);
446         userData(obj);
447     }
448     public void statement(AResource subj, AResource pred, ALiteral lit) {
449         userData(subj);
450         userData(pred);
451         }
452   }
453   private static class SH implements StatementHandler {
454     public void statement(AResource subj, AResource pred, AResource obj) {
455       lineNumber();
456       resource(subj);
457       resource(pred);
458       resource(obj);
459       line.append('.');
460       System.out.println(line);
461       line.setLength(0);
462     }
463     public void statement(AResource subj, AResource pred, ALiteral lit) {
464       String lang = lit.getLang();
465       String parseType = lit.getParseType();
466       lineNumber();
467       /*
468       if (parseType != null) {
469         System.out.print("# ");
470         if (parseType != null)
471           System.out.print("'" + parseType + "'");
472         System.out.println();
473       }
474       */
475       resource(subj);
476       resource(pred);
477       literal(lit);
478       line.append('.');
479       System.out.println(line);
480       line.setLength(0);
481     }
482   }
483   static private void print(String s) {
484     line.append(s);
485   }
486   static private void resource(AResource r) {
487     if (r.isAnonymous()) {
488       print("_:j");
489       print(r.getAnonymousID());
490       print(" ");
491     } else {
492       print("<");
493       escapeURI(r.getURI());
494       print("> ");
495     }
496   }
497   static private void escape(String s) {
498     int lg = s.length();
499     for (int i = 0; i < lg; i++) {
500       char ch = s.charAt(i);
501       switch (ch) {
502         case '\\' :
503           print("\\\\");
504           break;
505         case '"' :
506           print("\\\"");
507           break;
508         case '\n' :
509           print("\\n");
510           break;
511         case '\r' :
512           print("\\r");
513           break;
514         case '\t' :
515           print("\\t");
516           break;
517         default :
518           if (ch >= 32 && ch <= 126)
519             line.append(ch);
520           else {
521             print("\\u");
522             String hexstr = Integer.toHexString(ch).toUpperCase();
523             int pad = 4 - hexstr.length();
524 
525             for (; pad > 0; pad--)
526               print("0");
527             print(hexstr);
528           }
529       }
530     }
531   }
532 
533   static private boolean okURIChars[] = new boolean[128];
534   static {
535     for (int i = 32; i < 127; i++)
536       okURIChars[i] = true;
537     okURIChars['<'] = false;
538     okURIChars['>'] = false;
539     okURIChars['\\'] = false;
540 
541   }
542   static private void escapeURI(String s) {
543     int lg = s.length();
544     for (int i = 0; i < lg; i++) {
545       char ch = s.charAt(i);
546       if (ch < okURIChars.length && okURIChars[ch]) {
547         line.append(ch);
548       } else {
549         print("\\u");
550         String hexstr = Integer.toHexString(ch).toUpperCase();
551         int pad = 4 - hexstr.length();
552 
553         for (; pad > 0; pad--)
554           print("0");
555         print(hexstr);
556       }
557     }
558   }
559   static private void literal(ALiteral l) {
560     //if (l.isWellFormedXML())
561     //  System.out.print("xml");
562     line.append('"');
563     escape(l.toString());
564     line.append('"');
565     String lang = l.getLang();
566     if (lang != null && !lang.equals("")) {
567       line.append('@');
568       print(lang);
569     }
570     String dt = l.getDatatypeURI();
571     if (dt != null && !dt.equals("")) {
572       print("^^<");
573       escapeURI(dt);
574       line.append('>');
575     }
576 
577     line.append(' ');
578   }
579 
580 }