Source code: com/hp/hpl/jena/rdf/arp/NTriple.java
1 /*
2 * (c) Copyright 2001, 2002, 2003, 2004, 2005 Hewlett-Packard Development Company, LP
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 * * $Id: NTriple.java,v 1.14 2005/02/21 12:09:15 andy_seaborne Exp $
28
29 AUTHOR: Jeremy J. Carroll
30 */
31 /*
32 * XML2NTriple.java
33 *
34 * Created on July 13, 2001, 10:06 PM
35 */
36
37 package com.hp.hpl.jena.rdf.arp;
38
39 import java.net.*;
40 import java.io.*;
41
42 import org.xml.sax.*;
43 /** A command line interface into ARP.
44 * Creates NTriple's or just error messages.
45 * <pre>
46 * java <class-path> com.hp.hpl.jena.arp.NTriple ( [ -[xstfu]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )...
47 * </pre>
48 * <p>
49 * <class-path> should contain <code>jena.jar</code>, <code>xerces.jar</code>, and <code>icu4j.jar</code> or equivalents.
50 * </p>
51 * All options, files and URLs can be intemingled in any order.
52 * They are processed from left-to-right.
53 * <dl>
54 * file </dt><dd> Converts (embedded) RDF in XML file into N-triples
55 * </dd><dt>
56 * url </dt><dd> Converts (embedded) RDF from URL into N-triples
57 * </dd><dt>
58 * -b uri </dt><dd> Sets XML Base to the absolute URI.
59 * </dd><dt>
60 * -r </dt><dd> Content is RDF (no embedding, rdf:RDF tag may be omitted).
61 * </dd><dt>
62 * -t </dt><dd> No n-triple output, error checking only.
63 * </dd><dt>
64 * -x </dt><dd> Lax mode - warnings are suppressed.
65 * </dd><dt>
66 * -s </dt><dd> Strict mode - most warnings are errors.
67 * </dd><dt>
68 * -u </dt><dd> Allow unqualified attributes (defaults to warning).
69 * </dd><dt>
70 * -f </dt><dd> All errors are fatal - report first one only.
71 * </dd><dt>
72 * -n </dt><dd> Show line numbers of each triple.
73 * </dd><dt>
74 * -b url </dt><dd> Sets XML Base to the absolute url.
75 * </dd><dt>
76 * -e NNN[,NNN...]</dt><dd>
77 * Treats numbered warning conditions as errrors.
78 * </dd><dt>
79 * -w NNN[,NNN...]</dt><dd>
80 * Treats numbered error conditions as warnings.
81 * </dd><dt>
82 * -i NNN[,NNN...]
83 * </dt><dd>
84 * Ignores numbered error/warning conditions.
85 * </dl>
86 * @author jjc
87 */
88 public class NTriple implements ARPErrorNumbers {
89
90 private static StringBuffer line = new StringBuffer();
91 private static ARP arp;
92 private static String xmlBase = null;
93 private static boolean numbers = false;
94 /** Starts an RDF/XML to NTriple converter.
95 * @param args The command-line arguments.
96 */
97 static public void main(String args[]) {
98 mainEh(args, null, null);
99 }
100 static StatementHandler andMeToo = null;
101 /** Starts an RDF/XML to NTriple converter,
102 * using an error handler, and an ARPHandler.
103 * Statements get processed both by this class,
104 * and by the passed in StatementHandler
105 * @param args The command-line arguments.
106 * @param eh Can be null.
107 * @param ap Can be null.
108 */
109 static public void mainEh(String args[], ErrorHandler eh, ARPEventHandler ap) {
110 boolean doneOne = false;
111 startMem = -1;
112 andMeToo = ap;
113 //SH sh = new SH();
114 int i;
115 arp = new ARP();
116 ARPHandlers handlers = arp.getHandlers();
117 handlers.setStatementHandler(getSH(true));
118 if (ap != null) {
119 handlers.setNamespaceHandler(ap);
120 handlers.setExtendedHandler(ap);
121 }
122 if (eh != null)
123 handlers.setErrorHandler(eh);
124
125 for (i = 0; i < args.length - 1; i++) {
126 if (args[i].startsWith("-")) {
127 i += processOpts(args[i].substring(1), args[i + 1]);
128 } else {
129 doneOne = true;
130 process(args[i]);
131 }
132 }
133 if (args.length > 0) {
134 if (args[i].startsWith("-")) {
135 if (doneOne || processOpts(args[i].substring(1), "100") == 1)
136 usage();
137 } else {
138 doneOne = true;
139 process(args[i]);
140 }
141 }
142 if (!doneOne) {
143 process(System.in, "http://example.org/stdin", "standard input");
144 }
145 if ( startMem != -1) {
146 rt.gc();
147 System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
148 rt.gc();
149 System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
150 rt.gc();
151 System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
152 rt.gc();
153 System.out.println(rt.totalMemory()-rt.freeMemory()-startMem);
154 }
155 }
156
157 /**
158 * @param b false for quiet.
159 * @return
160 */
161 private static StatementHandler getSH(boolean b) {
162 StatementHandler rslt = b?(StatementHandler)new SH():new NoSH();
163 if (andMeToo!=null)
164 rslt = new TwoSH(rslt,andMeToo);
165 return rslt;
166 }
167
168 static private void lineNumber() {
169 if (numbers) {
170 Locator locator = arp.getLocator();
171 if (locator != null)
172 print(
173 "# "
174 + locator.getSystemId()
175 + ":"
176 + locator.getLineNumber()
177 + "("
178 + locator.getColumnNumber()
179 + ")\n");
180 }
181 }
182
183 /*
184 * Options:
185 * -x Lax, Warnings suppressed
186 * -s Strict, Warnings are errors
187 * -f All errors are fatal.
188 * -u Suppress unqualified attribute warnings
189 * -t Error checking only, no n-triple output
190 * -b: set xml:base (same for all files?)
191 * -e: convert numbered warnings to errors
192 * -i: suppress numbered warnings
193 * -w: convert numbered errors/suppressed warnings to warnings
194 * -n: give line numbers
195 *
196 */
197 static void usage() {
198 System.err.println(
199 "java <class-path> "
200 + NTriple.class.getName()
201 + " ( [ -[xstfu]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )... ");
202 System.err.println(
203 " All options, files and URLs can be intemingled in any order.");
204 System.err.println(" They are processed from left-to-right.");
205 System.err.println(
206 " file Converts (embedded) RDF in XML file into N-triples");
207 System.err.println(
208 " url Converts (embedded) RDF from URL into N-triples");
209 System.err.println(" -b uri Sets XML Base to the absolute URI.");
210 System.err.println(
211 " -r Content is RDF (no embedding, rdf:RDF tag may be omitted).");
212 System.err.println(
213 " -t No n-triple output, error checking only.");
214 System.err.println(" -x Lax mode - warnings are suppressed.");
215 System.err.println(
216 " -s Strict mode - most warnings are errors.");
217 System.err.println(" -n Show line and column numbers.");
218 System.err.println(
219 " -u Allow unqualified attributes (defaults to warning).");
220 System.err.println(
221 " -f All errors are fatal - report first one only.");
222 System.err.println(" -b url Sets XML Base to the absolute url.");
223 System.err.println(" -e NNN[,NNN...]");
224 System.err.println(
225 " Treats numbered warning conditions as errrors.");
226 System.err.println(" -w NNN[,NNN...]");
227 System.err.println(
228 " Treats numbered error conditions as warnings.");
229 System.err.println(" -i NNN[,NNN...]");
230 System.err.println(
231 " Ignores numbered error/warning conditions.");
232 System.exit(1);
233 }
234 static final private Runtime rt = Runtime.getRuntime();
235 static private int startMem = -1;
236 static private int processOpts(String opts, String nextArg) {
237 boolean usedNext = false;
238 ARPOptions options = arp.getOptions();
239 for (int i = 0; i < opts.length(); i++) {
240 char opt = opts.charAt(i);
241 if ("beiwD".indexOf(opt) != -1) {
242 if (usedNext)
243 usage();
244 usedNext = true;
245 }
246 switch (opt) {
247 case 'D':
248 final int nStatements = Integer.parseInt(nextArg);
249 rt.gc(); rt.gc();
250 startMem = (int)(rt.totalMemory()-rt.freeMemory());
251 arp.getHandlers().setStatementHandler(new StatementHandler(){
252 int debugC = 0;
253
254 public void statement(AResource subj, AResource pred, AResource obj) {
255 statement(null,null,(ALiteral)null);
256
257 }
258
259 public void statement(AResource subj, AResource pred, ALiteral lit) {
260 if (++debugC%100 == 0) {
261 System.out.println("T: " + debugC);
262 rt.gc();
263 System.out.println("M1: "+ (rt.totalMemory()-rt.freeMemory()-startMem));
264 rt.gc();
265 System.out.println("M2: " + (rt.totalMemory()-rt.freeMemory()-startMem));
266 }
267 if ( debugC == 1 ){
268 rt.gc(); rt.gc();
269 startMem = (int)(rt.totalMemory()-rt.freeMemory());
270 }
271 if (debugC == nStatements) {
272
273 rt.gc();
274 System.err.println("Kill me now.");
275 try {
276 Thread.sleep(200000);
277 }
278 catch (Exception e){
279 }
280
281 }
282
283
284 }
285 });
286 break;
287 case 'x' :
288 options.setLaxErrorMode();
289 break;
290 case 's' :
291 options.setStrictErrorMode();
292 break;
293 case 't' :
294 arp.getHandlers().setStatementHandler(getSH(false));
295 break;
296 case 'r' :
297 options.setEmbedding(false);
298 break;
299 case 'n' :
300 numbers = true;
301 break;
302 case 'b' :
303 xmlBase = nextArg;
304 break;
305 case 'e' :
306 setErrorMode(nextArg, EM_ERROR);
307 break;
308 case 'i' :
309 setErrorMode(nextArg, EM_IGNORE);
310 break;
311 case 'w' :
312 setErrorMode(nextArg, EM_WARNING);
313 break;
314 case 'f' :
315 for (int j = 0; j < 400; j++) {
316 if (options.setErrorMode(j, -1) == EM_ERROR)
317 options.setErrorMode(j, EM_FATAL);
318 }
319 break;
320 case 'u' :
321 options.setErrorMode(WARN_UNQUALIFIED_ATTRIBUTE, EM_IGNORE);
322 options.setErrorMode(WARN_UNQUALIFIED_RDF_ATTRIBUTE, EM_IGNORE);
323 break;
324 default :
325 usage();
326 }
327 }
328 return usedNext ? 1 : 0;
329 }
330
331 static private void setErrorMode(String numbers, int mode) {
332 int n[] = new int[3];
333 int j = 0;
334 numbers += ",";
335 for (int i = 0; i < numbers.length(); i++) {
336 char c = numbers.charAt(i);
337 switch (c) {
338 case '0' :
339 case '1' :
340 case '2' :
341 case '3' :
342 case '4' :
343 case '5' :
344 case '6' :
345 case '7' :
346 case '8' :
347 case '9' :
348 if (j == 3)
349 usage();
350 n[j++] = c - '0';
351 break;
352 case ' ' :
353 case ';' :
354 case ',' :
355 if (i == 0)
356 usage();
357 switch (j) {
358 case 0 :
359 break;
360 case 3 :
361 arp.getOptions().setErrorMode(
362 n[0] * 100 + n[1] * 10 + n[2],
363 mode);
364 j = 0;
365 break;
366 default :
367 usage();
368 }
369 break;
370 default :
371 usage();
372 }
373 }
374 }
375
376 static private void process(String surl) {
377 InputStream in;
378 String xmlBase;
379
380 URL url;
381 String baseURL;
382
383 try {
384 File ff = new File(surl);
385 in = new FileInputStream(ff);
386 url = ff.toURL();
387 baseURL = url.toExternalForm();
388 if (baseURL.startsWith("file:/")
389 && !baseURL.startsWith("file://")) {
390 baseURL = "file://" + baseURL.substring(5);
391 }
392 } catch (Exception ignore) {
393 try {
394 url = new URL(surl);
395 in = url.openStream();
396 baseURL = url.toExternalForm();
397 } catch (Exception e) {
398 System.err.println("ARP: Failed to open: " + surl);
399 System.err.println(
400 " " + ParseException.formatMessage(ignore));
401 System.err.println(" " + ParseException.formatMessage(e));
402 return;
403 }
404 }
405 process(in, baseURL, surl);
406 }
407 static private void process(InputStream in, String xmlBasex, String surl) {
408 String xmlBasey = xmlBase == null ? xmlBasex : xmlBase;
409 try {
410 arp.load(in, xmlBasey);
411 } catch (IOException e) {
412 System.err.println(
413 "Error: " + surl + ": " + ParseException.formatMessage(e));
414 } catch (SAXParseException e) {
415 // already reported.
416 } catch (SAXException sax) {
417 System.err.println(
418 "Error: " + surl + ": " + ParseException.formatMessage(sax));
419 }
420 }
421 private static class TwoSH implements StatementHandler {
422 final StatementHandler a, b;
423 public void statement(AResource subj, AResource pred, AResource obj) {
424 a.statement(subj, pred, obj);
425 b.statement(subj, pred, obj);
426 }
427 public void statement(AResource subj, AResource pred, ALiteral lit) {
428 a.statement(subj, pred, lit);
429 b.statement(subj, pred, lit);
430 }
431 TwoSH(StatementHandler A, StatementHandler B) {
432 a = A;
433 b = B;
434 }
435 }
436 private static class NoSH implements StatementHandler {
437 private int ix = 0;
438 private void userData(AResource n){
439 if (n.isAnonymous()) {
440 n.setUserData(new Integer(ix++));
441 }
442 }
443 public void statement(AResource subj, AResource pred, AResource obj) {
444 userData(subj);
445 userData(pred);
446 userData(obj);
447 }
448 public void statement(AResource subj, AResource pred, ALiteral lit) {
449 userData(subj);
450 userData(pred);
451 }
452 }
453 private static class SH implements StatementHandler {
454 public void statement(AResource subj, AResource pred, AResource obj) {
455 lineNumber();
456 resource(subj);
457 resource(pred);
458 resource(obj);
459 line.append('.');
460 System.out.println(line);
461 line.setLength(0);
462 }
463 public void statement(AResource subj, AResource pred, ALiteral lit) {
464 String lang = lit.getLang();
465 String parseType = lit.getParseType();
466 lineNumber();
467 /*
468 if (parseType != null) {
469 System.out.print("# ");
470 if (parseType != null)
471 System.out.print("'" + parseType + "'");
472 System.out.println();
473 }
474 */
475 resource(subj);
476 resource(pred);
477 literal(lit);
478 line.append('.');
479 System.out.println(line);
480 line.setLength(0);
481 }
482 }
483 static private void print(String s) {
484 line.append(s);
485 }
486 static private void resource(AResource r) {
487 if (r.isAnonymous()) {
488 print("_:j");
489 print(r.getAnonymousID());
490 print(" ");
491 } else {
492 print("<");
493 escapeURI(r.getURI());
494 print("> ");
495 }
496 }
497 static private void escape(String s) {
498 int lg = s.length();
499 for (int i = 0; i < lg; i++) {
500 char ch = s.charAt(i);
501 switch (ch) {
502 case '\\' :
503 print("\\\\");
504 break;
505 case '"' :
506 print("\\\"");
507 break;
508 case '\n' :
509 print("\\n");
510 break;
511 case '\r' :
512 print("\\r");
513 break;
514 case '\t' :
515 print("\\t");
516 break;
517 default :
518 if (ch >= 32 && ch <= 126)
519 line.append(ch);
520 else {
521 print("\\u");
522 String hexstr = Integer.toHexString(ch).toUpperCase();
523 int pad = 4 - hexstr.length();
524
525 for (; pad > 0; pad--)
526 print("0");
527 print(hexstr);
528 }
529 }
530 }
531 }
532
533 static private boolean okURIChars[] = new boolean[128];
534 static {
535 for (int i = 32; i < 127; i++)
536 okURIChars[i] = true;
537 okURIChars['<'] = false;
538 okURIChars['>'] = false;
539 okURIChars['\\'] = false;
540
541 }
542 static private void escapeURI(String s) {
543 int lg = s.length();
544 for (int i = 0; i < lg; i++) {
545 char ch = s.charAt(i);
546 if (ch < okURIChars.length && okURIChars[ch]) {
547 line.append(ch);
548 } else {
549 print("\\u");
550 String hexstr = Integer.toHexString(ch).toUpperCase();
551 int pad = 4 - hexstr.length();
552
553 for (; pad > 0; pad--)
554 print("0");
555 print(hexstr);
556 }
557 }
558 }
559 static private void literal(ALiteral l) {
560 //if (l.isWellFormedXML())
561 // System.out.print("xml");
562 line.append('"');
563 escape(l.toString());
564 line.append('"');
565 String lang = l.getLang();
566 if (lang != null && !lang.equals("")) {
567 line.append('@');
568 print(lang);
569 }
570 String dt = l.getDatatypeURI();
571 if (dt != null && !dt.equals("")) {
572 print("^^<");
573 escapeURI(dt);
574 line.append('>');
575 }
576
577 line.append(' ');
578 }
579
580 }