Source code: jena/schemagen.java
1 /*****************************************************************************
2 * Source code information
3 * -----------------------
4 * Original author Ian Dickinson, HP Labs Bristol
5 * Author email Ian.Dickinson@hp.com
6 * Package Jena 2
7 * Web http://sourceforge.net/projects/jena/
8 * Created 14-Apr-2003
9 * Filename $RCSfile: schemagen.java,v $
10 * Revision $Revision: 1.40 $
11 * Release status $State: Exp $
12 *
13 * Last modified on $Date: 2005/04/05 18:44:04 $
14 * by $Author: ian_dickinson $
15 *
16 * (c) Copyright 2002, 2003, 2004, 2005 Hewlett-Packard Development Company, LP
17 * (see footer for full conditions)
18 *****************************************************************************/
19
20 // Package
21 ///////////////
22 package jena;
23
24
25 // Imports
26 ///////////////
27 import java.util.*;
28 import java.io.*;
29 import java.net.MalformedURLException;
30 import java.net.URL;
31 import java.text.SimpleDateFormat;
32
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.oro.text.regex.*;
35 import org.apache.xerces.util.XMLChar;
36
37 import com.hp.hpl.jena.ontology.*;
38 import com.hp.hpl.jena.rdf.model.*;
39 import com.hp.hpl.jena.vocabulary.*;
40 import com.hp.hpl.jena.shared.*;
41
42
43
44 /**
45 * <p>
46 * A vocabulary generator, that will consume an ontology or other vocabulary file,
47 * and generate a Java file with the constants from the vocabulary compiled in.
48 * Designed to be highly flexible and customisable.
49 * </p>
50 *
51 * @author Ian Dickinson, HP Labs
52 * (<a href="mailto:Ian.Dickinson@hp.com" >email</a>)
53 * @version CVS $Id: schemagen.java,v 1.40 2005/04/05 18:44:04 ian_dickinson Exp $
54 */
55 public class schemagen {
56 // Constants
57 //////////////////////////////////
58
59 /** The namespace for the configuration model is {@value} */
60 public static final String NS = "http://jena.hpl.hp.com/2003/04/schemagen#";
61
62 /** The default location of the configuration model is {@value} */
63 public static final String DEFAULT_CONFIG_URI = "file:schemagen.rdf";
64
65 /** The default marker string for denoting substitutions is {@value} */
66 public static final String DEFAULT_MARKER = "%";
67
68 /** Default template for writing out value declarations */
69 public static final String DEFAULT_TEMPLATE = "public static final %valclass% %valname% = m_model.%valcreator%( \"%valuri%\" );";
70
71 /** Default template for writing out individual declarations */
72 public static final String DEFAULT_INDIVIDUAL_TEMPLATE = "public static final %valclass% %valname% = m_model.%valcreator%( \"%valuri%\", %valtype% );";
73
74 /** Default template for the file header */
75 public static final String DEFAULT_HEADER_TEMPLATE = "/* CVS $" + "Id: $ */%nl%%package% %nl%%imports% %nl%/**%nl% * Vocabulary definitions from %sourceURI% %nl% * @author Auto-generated by schemagen on %date% %nl% */";
76
77 /** Default line length for comments before wrap */
78 public static final int COMMENT_LENGTH_LIMIT = 80;
79
80
81 /* Constants for the various options we can set */
82
83 /** Select an alternative config file; use <code>-c <filename></code> on command line */
84 protected static final Object OPT_CONFIG_FILE = new Object();
85
86 /** Turn off all comment output; use <code>--nocomments</code> on command line; use <code>sgen:noComments</code> in config file */
87 protected static final Object OPT_NO_COMMENTS = new Object();
88
89 /** Nominate the URL of the input document; use <code>-i <URL></code> on command line; use <code>sgen:input</code> in config file */
90 protected static final Object OPT_INPUT = new Object();
91
92 /** Specify that the language of the source is DAML+OIL; use <code>--daml</code> on command line; use <code>sgen:daml</code> in config file */
93 protected static final Object OPT_LANG_DAML = new Object();
94
95 /** Specify that the language of the source is OWL (the default); use <code>--owl</code> on command line; use <code>sgen:owl</code> in config file */
96 protected static final Object OPT_LANG_OWL = new Object();
97
98 /** Specify that the language of the source is RDFS; use <code>--rdfs</code> on command line; use <code>sgen:rdfs</code> in config file */
99 protected static final Object OPT_LANG_RDFS = new Object();
100
101 /** Specify that destination file; use <code>-o <fileName></code> on command line; use <code>sgen:output</code> in config file */
102 protected static final Object OPT_OUTPUT = new Object();
103
104 /** Specify the file header; use <code>--header "..."</code> on command line; use <code>sgen:header</code> in config file */
105 protected static final Object OPT_HEADER = new Object();
106
107 /** Specify the file footer; use <code>--footer "..."</code> on command line; use <code>sgen:footer</code> in config file */
108 protected static final Object OPT_FOOTER = new Object();
109
110 /** Specify the uri of the configuration root node; use <code>--root <URL></code> on command line */
111 protected static final Object OPT_ROOT = new Object();
112
113 /** Specify the marker string for substitutions, default is '%'; use <code>-m "..."</code> on command line; use <code>sgen:marker</code> in config file */
114 protected static final Object OPT_MARKER = new Object();
115
116 /** Specify the packagename; use <code>--package <packagename></code> on command line; use <code>sgen:package</code> in config file */
117 protected static final Object OPT_PACKAGENAME = new Object();
118
119 /** Use ontology terms in preference to vanilla RDF; use <code>--ontology</code> on command line; use <code>sgen:ontology</code> in config file */
120 protected static final Object OPT_ONTOLOGY = new Object();
121
122 /** The name of the generated class; use <code>-n <classname></code> on command line; use <code>sgen:classname</code> in config file */
123 protected static final Object OPT_CLASSNAME = new Object();
124
125 /** Additional decoration for class header (such as implements); use <code>--classdec <classname></code> on command line; use <code>sgen:classdec</code> in config file */
126 protected static final Object OPT_CLASSDEC = new Object();
127
128 /** The namespace URI for the vocabulary; use <code>- <uri></code> on command line; use <code>sgen:namespace</code> in config file */
129 protected static final Object OPT_NAMESPACE = new Object();
130
131 /** Additional declarations to add at the top of the class; use <code>--declarations <...></code> on command line; use <code>sgen:declarations</code> in config file */
132 protected static final Object OPT_DECLARATIONS = new Object();
133
134 /** Section declaration for properties section; use <code>--propSection <...></code> on command line; use <code>sgen:propSection</code> in config file */
135 protected static final Object OPT_PROPERTY_SECTION = new Object();
136
137 /** Section declaration for class section; use <code>--classSection <...></code> on command line; use <code>sgen:classSection</code> in config file */
138 protected static final Object OPT_CLASS_SECTION = new Object();
139
140 /** Section declaration for individuals section; use <code>--individualsSection <...></code> on command line; use <code>sgen:individualsSection</code> in config file */
141 protected static final Object OPT_INDIVIDUALS_SECTION = new Object();
142
143 /** Option to suppress properties in vocab file; use <code>--noproperties <...></code> on command line; use <code>sgen:noproperties</code> in config file */
144 protected static final Object OPT_NOPROPERTIES = new Object();
145
146 /** Option to suppress classes in vocab file; use <code>--noclasses <...></code> on command line; use <code>sgen:noclasses</code> in config file */
147 protected static final Object OPT_NOCLASSES = new Object();
148
149 /** Option to suppress individuals in vocab file; use <code>--noindividuals <...></code> on command line; use <code>sgen:noindividuals</code> in config file */
150 protected static final Object OPT_NOINDIVIDUALS = new Object();
151
152 /** Option for no file header; use <code>--noheader <...></code> on command line; use <code>sgen:noheader</code> in config file */
153 protected static final Object OPT_NOHEADER = new Object();
154
155 /** Template for writing out property declarations; use <code>--propTemplate <...></code> on command line; use <code>sgen:propTemplate</code> in config file */
156 protected static final Object OPT_PROP_TEMPLATE = new Object();
157
158 /** Template for writing out class declarations; use <code>--classTemplate <...></code> on command line; use <code>sgen:classTemplate</code> in config file */
159 protected static final Object OPT_CLASS_TEMPLATE = new Object();
160
161 /** Template for writing out individual declarations; use <code>--individualTemplate <...></code> on command line; use <code>sgen:individualTemplate</code> in config file */
162 protected static final Object OPT_INDIVIDUAL_TEMPLATE = new Object();
163
164 /** Option for mapping constant names to uppercase; use <code>--uppercase <...></code> on command line; use <code>sgen:uppercase</code> in config file */
165 protected static final Object OPT_UC_NAMES = new Object();
166
167 /** Option for including non-local URI's in vocabulary; use <code>--include <uri></code> on command line; use <code>sgen:include</code> in config file */
168 protected static final Object OPT_INCLUDE = new Object();
169
170 /** Option for adding a suffix to the generated class name; use <code>--classnamesuffix <uri></code> on command line; use <code>sgen:classnamesuffix</code> in config file */
171 protected static final Object OPT_CLASSNAME_SUFFIX = new Object();
172
173 /** Option for the presentation syntax (encoding) of the file; use <code>-e <i>encoding</i></code> on command line; use <code>sgen:encoding</code> in config file */
174 protected static final Object OPT_ENCODING = new Object();
175
176 /** Option to show the usage message; use --help on command line */
177 protected static final Object OPT_HELP = new Object();
178
179 /** Option to generate an output file with DOS (\r\n) line endings. Default is Unix line endings. */
180 protected static final Object OPT_DOS = new Object();
181
182 /** Option to generate to force the model to perform inference, off by default. */
183 protected static final Object OPT_USE_INF = new Object();
184
185
186
187 // Static variables
188 //////////////////////////////////
189
190
191 // Instance variables
192 //////////////////////////////////
193
194 /** The list of command line arguments */
195 protected List m_cmdLineArgs;
196
197 /** The root of the options in the config file */
198 protected Resource m_root;
199
200 /** The model that contains the configuration information */
201 protected Model m_config = ModelFactory.createDefaultModel();
202
203 /** The model that contains the input source */
204 protected OntModel m_source;
205
206 /** The output stream we write to */
207 protected PrintStream m_output;
208
209 /** Option definitions */
210 protected Object[][] m_optionDefinitions = new Object[][] {
211 {OPT_CONFIG_FILE, new OptionDefinition( "-c", null ) },
212 {OPT_ROOT, new OptionDefinition( "-r", null ) },
213 {OPT_NO_COMMENTS, new OptionDefinition( "--nocomments", "noComments" ) },
214 {OPT_INPUT, new OptionDefinition( "-i", "input" ) },
215 {OPT_LANG_DAML, new OptionDefinition( "--daml", "daml" ) },
216 {OPT_LANG_OWL, new OptionDefinition( "--owl", "owl" ) },
217 {OPT_LANG_RDFS, new OptionDefinition( "--rdfs", "rdfs" ) },
218 {OPT_OUTPUT, new OptionDefinition( "-o", "output" ) },
219 {OPT_HEADER, new OptionDefinition( "--header", "header" ) },
220 {OPT_FOOTER, new OptionDefinition( "--footer", "footer" ) },
221 {OPT_MARKER, new OptionDefinition( "--marker", "marker" ) },
222 {OPT_PACKAGENAME, new OptionDefinition( "--package", "package" ) },
223 {OPT_ONTOLOGY, new OptionDefinition( "--ontology", "ontology" ) },
224 {OPT_CLASSNAME, new OptionDefinition( "-n", "classname" ) },
225 {OPT_CLASSDEC, new OptionDefinition( "--classdec", "classdec" ) },
226 {OPT_NAMESPACE, new OptionDefinition( "-a", "namespace" ) },
227 {OPT_DECLARATIONS, new OptionDefinition( "--declarations", "declarations" ) },
228 {OPT_PROPERTY_SECTION, new OptionDefinition( "--propSection", "propSection" ) },
229 {OPT_CLASS_SECTION, new OptionDefinition( "--classSection", "classSection" ) },
230 {OPT_INDIVIDUALS_SECTION, new OptionDefinition( "--individualsSection", "individualsSection" ) },
231 {OPT_NOPROPERTIES, new OptionDefinition( "--noproperties", "noproperties" ) },
232 {OPT_NOCLASSES, new OptionDefinition( "--noclasses", "noclasses" ) },
233 {OPT_NOINDIVIDUALS, new OptionDefinition( "--noindividuals", "noindividuals" ) },
234 {OPT_PROP_TEMPLATE, new OptionDefinition( "--propTemplate", "propTemplate" ) },
235 {OPT_CLASS_TEMPLATE, new OptionDefinition( "--classTemplate", "classTemplate" ) },
236 {OPT_INDIVIDUAL_TEMPLATE, new OptionDefinition( "--individualTemplate", "individualTemplate" ) },
237 {OPT_UC_NAMES, new OptionDefinition( "--uppercase", "uppercase" ) },
238 {OPT_INCLUDE, new OptionDefinition( "--include", "include" ) },
239 {OPT_CLASSNAME_SUFFIX, new OptionDefinition( "--classnamesuffix", "classnamesuffix" )},
240 {OPT_NOHEADER, new OptionDefinition( "--noheader", "noheader" )},
241 {OPT_ENCODING, new OptionDefinition( "-e", "encoding" )},
242 {OPT_HELP, new OptionDefinition( "--help", null )},
243 {OPT_DOS, new OptionDefinition( "--dos", "dos" )},
244 {OPT_USE_INF, new OptionDefinition( "--inference", "inference" )},
245 };
246
247 /** Stack of replacements to apply */
248 protected List m_replacements = new ArrayList();
249
250 /** Perl5 pattern compiler */
251 protected Perl5Compiler m_perlCompiler = new Perl5Compiler();
252
253 /** Perl5 pattern matcher */
254 protected PatternMatcher m_matcher = new Perl5Matcher();
255
256 /** Output file newline char - default is Unix, override with --dos */
257 protected String m_nl = "\n";
258
259 /** Size of indent step */
260 protected int m_indentStep = 4;
261
262 /** Set of names used so far */
263 protected Set m_usedNames = new HashSet();
264
265 /** Map from resources to java names */
266 protected Map m_resourcesToNames = new HashMap();
267
268 /** List of allowed namespace URI strings for admissible values */
269 protected List m_includeURI = new ArrayList();
270
271
272 // Constructors
273 //////////////////////////////////
274
275 // External signature methods
276 //////////////////////////////////
277
278 /* Main entry point. See Javadoc for details of the many command line arguments */
279 public static void main( String[] args ) {
280 new schemagen().go( args );
281 }
282
283
284 // Internal implementation methods
285 //////////////////////////////////
286
287 /** Read the configuration parameters and do setup */
288 protected void go( String[] args ) {
289 // save the command line params
290 m_cmdLineArgs = Arrays.asList( args );
291
292 // check for user requesting help
293 if (m_cmdLineArgs.contains( getOpt( OPT_HELP ).m_cmdLineForm )) {
294 usage();
295 }
296
297 // check to see if there's a specified config file
298 String configURL = DEFAULT_CONFIG_URI;
299 if (hasValue( OPT_CONFIG_FILE )) {
300 // check for protocol; add file: if not specified
301 configURL = urlCheck( getValue( OPT_CONFIG_FILE ) );
302 }
303
304 // try to read the config URI
305 try {
306 m_config.read( configURL );
307 }
308 catch (Exception e) {
309 // if the user left the default config uri in place, it's not an error to fail to read it
310 if (!configURL.equals( DEFAULT_CONFIG_URI )) {
311 abort( "Failed to read configuration from URI " + configURL, e );
312 }
313 }
314
315 // got the configuration, now we can begin processing
316 processInput();
317 }
318
319 /** The sequence of steps to process an entire file */
320 protected void processInput() {
321 determineConfigRoot();
322 determineLanguage();
323 selectInput();
324 selectOutput();
325 setGlobalReplacements();
326
327 processHeader();
328 writeClassDeclaration();
329 writeInitialDeclarations();
330 writeProperties();
331 writeClasses();
332 writeIndividuals();
333 writeClassClose();
334 processFooter();
335 closeOutput();
336 }
337
338 /** Determine the root resource in the configuration file */
339 protected void determineConfigRoot() {
340 if (hasValue( OPT_ROOT )) {
341 String rootURI = getValue( OPT_ROOT );
342 m_root = m_config.getResource( rootURI );
343 }
344 else {
345 // no specified root, we assume there is only one with type sgen:Config
346 StmtIterator i = m_config.listStatements( null, RDF.type, m_config.getResource( NS + "Config" ) );
347 if (i.hasNext()) {
348 m_root = i.nextStatement().getSubject();
349 }
350 else {
351 // no configuration root, so we invent one
352 m_root = m_config.createResource();
353 }
354 }
355
356 // add any extra uri's that are allowed in the filter
357 m_includeURI.addAll( getAllValues( OPT_INCLUDE ) );
358 }
359
360 /** Create the source model after determining which input language */
361 protected void determineLanguage() {
362 OntModelSpec s = null;
363 if (isTrue( OPT_LANG_DAML )) {
364 // daml language specified
365 if (isTrue( OPT_USE_INF )) {
366 s = OntModelSpec.DAML_MEM_RULE_INF;
367 }
368 else {
369 s = OntModelSpec.DAML_MEM;
370 }
371 }
372 else if (isTrue( OPT_LANG_RDFS )) {
373 // rdfs language specified
374 if (isTrue( OPT_USE_INF )) {
375 s = OntModelSpec.RDFS_MEM_RDFS_INF;
376 }
377 else {
378 s = OntModelSpec.RDFS_MEM;
379 }
380 }
381 else {
382 // owl is the default
383 // s = OntModelSpec.getDefaultSpec( ProfileRegistry.OWL_LANG );
384 if (isTrue( OPT_USE_INF )) {
385 s = OntModelSpec.OWL_MEM_RULE_INF;
386 }
387 else {
388 s = OntModelSpec.OWL_MEM;
389 }
390 }
391
392 m_source = ModelFactory.createOntologyModel( s, null );
393 m_source.getDocumentManager().setProcessImports( false );
394 }
395
396 /** Identify the URL that is to be read in and translated to a vocab file, and load the source into the source model */
397 protected void selectInput() {
398 if (!hasResourceValue( OPT_INPUT )) {
399 usage();
400 }
401
402 String input = urlCheck( getValue( OPT_INPUT ) );
403 String syntax = getValue( OPT_ENCODING );
404
405 try {
406 if (syntax == null) {
407 m_source.read( input );
408 }
409 else {
410 m_source.read( input, syntax );
411 }
412 }
413 catch (JenaException e) {
414 abort( "Failed to read input source " + input, e );
415 }
416 }
417
418 /** Identify the file we are to write the output to */
419 protected void selectOutput() {
420 String outFile = getValue( OPT_OUTPUT );
421
422 if (outFile == null) {
423 m_output = System.out;
424 }
425 else {
426 try {
427 File out = new File( outFile );
428
429 if (out.isDirectory()) {
430 // create a file in this directory named classname.java
431 String fileName = outFile + System.getProperty( "file.separator" ) + getClassName() + ".java";
432 out = new File( fileName );
433 }
434
435 m_output = new PrintStream( new FileOutputStream( out ) );
436 }
437 catch (Exception e) {
438 abort( "I/O error while trying to open file for writing: " + outFile, null );
439 }
440 }
441
442 // check for DOS line endings
443 if (isTrue( OPT_DOS )) {
444 m_nl = "\r\n";
445 }
446 }
447
448 /** Process the header at the start of the file, if defined */
449 protected void processHeader() {
450 String header = hasValue( OPT_HEADER ) ? getValue( OPT_HEADER ) : DEFAULT_HEADER_TEMPLATE;
451
452 // user can turn of header processing, default is to have it on
453 if (!hasValue( OPT_NOHEADER )) {
454 writeln( 0, substitute( header ) );
455 }
456 else {
457 // we have to do the imports at least
458 writeln( 0, "import com.hp.hpl.jena.rdf.model.*;" );
459 if (isTrue( OPT_ONTOLOGY )) {
460 writeln( 0, "import com.hp.hpl.jena.ontology.*;" );
461 }
462 }
463 }
464
465 /** Process the footer at the end of the file, if defined */
466 protected void processFooter() {
467 String footer = getValue( OPT_FOOTER );
468
469 if (footer != null) {
470 writeln( 0, substitute( footer ) );
471 }
472 }
473
474 /** The list of replacements that are always available */
475 protected void setGlobalReplacements() {
476 addReplacementPattern( "date", new SimpleDateFormat( "dd MMM yyyy HH:mm").format( new Date() ) );
477 addReplacementPattern( "package", hasValue( OPT_PACKAGENAME ) ? ("package " + getValue( OPT_PACKAGENAME ) + ";") : "" );
478 addReplacementPattern( "imports", getImports() );
479 addReplacementPattern( "classname", getClassName() );
480 addReplacementPattern( "sourceURI", getResource( OPT_INPUT ).getURI() );
481 addReplacementPattern( "nl", m_nl );
482 }
483
484 /** Add a pattern-value pair to the list of available patterns */
485 protected void addReplacementPattern( String key, String replacement ) {
486 if (replacement != null && key != null) {
487 String marker = getValue( OPT_MARKER );
488 marker = (marker == null) ? DEFAULT_MARKER : marker;
489
490 try {
491 m_replacements.add( new Replacement( m_perlCompiler.compile( marker + key + marker ),
492 new StringSubstitution( replacement ) ) );
493 }
494 catch (MalformedPatternException e) {
495 abort( "Malformed regexp pattern " + marker + key + marker, e );
496 }
497 }
498 }
499
500 /** Pop n replacements off the stack */
501 protected void pop( int n ) {
502 for (int i = 0; i < n; i++) {
503 m_replacements.remove( m_replacements.size() - 1 );
504 }
505 }
506
507
508 /** Close the output file */
509 protected void closeOutput() {
510 m_output.flush();
511 m_output.close();
512 }
513
514
515 /** Answer true if the given option is set to true */
516 protected boolean isTrue( Object option ) {
517 return getOpt( option ).isTrue();
518 }
519
520 /** Answer true if the given option has value */
521 protected boolean hasValue( Object option ) {
522 return getOpt( option ).hasValue();
523 }
524
525 /** Answer true if the given option has a resource value */
526 protected boolean hasResourceValue( Object option ) {
527 return getOpt( option ).hasResourceValue();
528 }
529
530 /** Answer the value of the option or null */
531 protected String getValue( Object option ) {
532 return getOpt( option ).getValue();
533 }
534
535 /** Answer all values for the given options as Strings */
536 protected List getAllValues( Object option ) {
537 List values = new ArrayList();
538 OptionDefinition opt = getOpt( option );
539
540 // look in the command line arguments
541 for (Iterator i = m_cmdLineArgs.iterator(); i.hasNext(); ) {
542 String s = (String) i.next();
543 if (s.equals( opt.m_cmdLineForm )) {
544 // next iter value is the arg value
545 values.add( i.next() );
546 }
547 }
548
549 // now look in the config file
550 for (StmtIterator i = m_root.listProperties( opt.m_prop ); i.hasNext(); ) {
551 Statement s = i.nextStatement();
552
553 if (s.getObject() instanceof Literal) {
554 values.add( s.getString() );
555 }
556 else {
557 values.add( s.getResource().getURI() );
558 }
559 }
560
561 return values;
562 }
563
564 /** Answer the value of the option or null */
565 protected Resource getResource( Object option ) {
566 return getOpt( option ).getResource();
567 }
568
569 /** Answer the option object for the given option */
570 protected OptionDefinition getOpt( Object option ) {
571 for (int i = 0; i < m_optionDefinitions.length; i++) {
572 if (m_optionDefinitions[i][0] == option) {
573 return (OptionDefinition) m_optionDefinitions[i][1];
574 }
575 }
576
577 return null;
578 }
579
580 /** Abort due to exception */
581 protected void abort( String msg, Exception e ) {
582 System.err.println( msg );
583 if (e != null) {
584 System.err.println( e );
585 }
586 System.exit( 1 );
587 }
588
589 /** Print usage message and abort */
590 protected void usage() {
591 System.err.println( "Usage:" );
592 System.err.println( " java jena.schemagen [options ...]" );
593 System.err.println();
594 System.err.println( "Commonly used options include:" );
595 System.err.println( " -i <input> the source document as a file or URL." );
596 System.err.println( " -n <name> the name of the created Java class." );
597 System.err.println( " -a <uri> the namespace URI of the source document." );
598 System.err.println( " -o <file> the file to write the generated class into." );
599 System.err.println( " -o <dir> the directory in which the generated Java class is created." );
600 System.err.println( " By default, output goes to stdout." );
601 System.err.println( " -e <encoding> the encoding of the input document (N3, RDF/XML, etc)." );
602 System.err.println( " -c <config> a filename or URL for an RDF document containing " );
603 System.err.println( " configuration parameters." );
604 System.err.println();
605 System.err.println( "Many other options are available. See the schemagen HOWTO in the " );
606 System.err.println( "Jena documentation for full details." );
607 System.exit( 1 );
608 }
609
610 /** Use the current replacements list to do the subs in the given string */
611 protected String substitute( String sIn ) {
612 String s = sIn;
613
614 for (Iterator i = m_replacements.iterator(); i.hasNext(); ) {
615 Replacement r = (Replacement) i.next();
616
617 s = Util.substitute( m_matcher, r.pattern, r.sub, s, Util.SUBSTITUTE_ALL );
618 }
619
620 return s;
621 }
622
623 /** Add the appropriate indent to a buffer */
624 protected int indentTo( int i, StringBuffer buf ) {
625 int indent = i * m_indentStep;
626 for (int j = 0; j < indent; j++) {
627 buf.append( ' ' );
628 }
629
630 return indent;
631 }
632
633 /** Write a blank line, with indent and newline */
634 protected void writeln( int indent ) {
635 writeln( indent, "" );
636 }
637
638 /** Write out the given string with n spaces of indent, with newline */
639 protected void writeln( int indent, String s ) {
640 write( indent, s );
641 m_output.print( m_nl );
642 }
643
644 /** Write out the given string with n spaces of indent */
645 protected void write( int indentLevel, String s ) {
646 for (int i = 0; i < (m_indentStep * indentLevel); i++) {
647 m_output.print( " " );
648 }
649
650 m_output.print( s );
651 }
652
653 /** Determine the list of imports to include in the file */
654 protected String getImports() {
655 StringBuffer buf = new StringBuffer();
656 buf.append( "import com.hp.hpl.jena.rdf.model.*;" );
657 buf.append( m_nl );
658
659 if (useOntology()) {
660 buf.append( "import com.hp.hpl.jena.ontology.*;" );
661 }
662
663 return buf.toString();
664 }
665
666 /** Determine the class name of the vocabulary from the uri */
667 protected String getClassName() {
668 // if a class name is given, just use that
669 if (hasValue( OPT_CLASSNAME )) {
670 return getValue(( OPT_CLASSNAME ));
671 }
672
673 // otherwise, we generate a name based on the URI
674 String uri = getValue( OPT_INPUT );
675
676 // remove any suffixes
677 uri = (uri.endsWith( "#" )) ? uri.substring( 0, uri.length() - 1 ) : uri;
678 uri = (uri.endsWith( ".daml" )) ? uri.substring( 0, uri.length() - 5 ) : uri;
679 uri = (uri.endsWith( ".owl" )) ? uri.substring( 0, uri.length() - 4 ) : uri;
680 uri = (uri.endsWith( ".rdf" )) ? uri.substring( 0, uri.length() - 4 ) : uri;
681 uri = (uri.endsWith( ".rdfs" )) ? uri.substring( 0, uri.length() - 5 ) : uri;
682 uri = (uri.endsWith( ".n3" )) ? uri.substring( 0, uri.length() - 3 ) : uri;
683 uri = (uri.endsWith( ".xml" )) ? uri.substring( 0, uri.length() - 4 ) : uri;
684
685 // now work back to the first non name character from the end
686 int i = uri.length() - 1;
687 for (; i > 0; i--) {
688 if (!Character.isUnicodeIdentifierPart( uri.charAt( i ) ) &&
689 uri.charAt( i ) != '-') {
690 i++;
691 break;
692 }
693 }
694
695 String name = uri.substring( i );
696
697 // optionally add name suffix
698 if (hasValue( OPT_CLASSNAME_SUFFIX )) {
699 name = name + getValue( OPT_CLASSNAME_SUFFIX );
700 }
701
702 // now we make the name into a legal Java identifier
703 return asLegalJavaID( name, true );
704 }
705
706 /** Answer true if we are using ontology terms in this vocabulary */
707 protected boolean useOntology() {
708 return isTrue( OPT_ONTOLOGY );
709 }
710
711 /** Answer true if all comments are suppressed */
712 protected boolean noComments() {
713 return isTrue( OPT_NO_COMMENTS );
714 }
715
716 /** Convert s to a legal Java identifier; capitalise first char if cap is true */
717 protected String asLegalJavaID( String s, boolean cap ) {
718 StringBuffer buf = new StringBuffer();
719 int i = 0;
720
721 // treat the first character specially - must be able to start a Java ID, may have to upcase
722 try {
723 for (; !Character.isJavaIdentifierStart( s.charAt( i )); i++) {}
724 }
725 catch (StringIndexOutOfBoundsException e) {
726 System.err.println( "Could not identify legal Java identifier start character in '" + s + "', replacing with __" );
727 return "__";
728 }
729 buf.append( cap ? Character.toUpperCase( s.charAt( i ) ) : s.charAt( i ) );
730
731 // copy the remaining characters - replace non-legal chars with '_'
732 for (++i; i < s.length(); i++) {
733 char c = s.charAt( i );
734 buf.append( Character.isJavaIdentifierPart( c ) ? c : '_' );
735 }
736
737 return buf.toString();
738 }
739
740 /** The opening class declaration */
741 protected void writeClassDeclaration() {
742 write( 0, "public class " );
743 write( 0, getClassName() );
744 write( 0, " " );
745
746 if (hasValue( OPT_CLASSDEC )) {
747 write( 0, getValue( OPT_CLASSDEC ) );
748 }
749
750 writeln( 0, "{" );
751 }
752
753 /** The close of the class decoration */
754 protected void writeClassClose() {
755 writeln( 0, "}" );
756 }
757
758 /** Write the declarations at the head of the class */
759 protected void writeInitialDeclarations() {
760 writeModelDeclaration();
761 writeNamespace();
762
763 if (hasValue( OPT_DECLARATIONS )) {
764 writeln( 0, getValue( OPT_DECLARATIONS ));
765 }
766 }
767
768 /** Write the declaration of the model */
769 protected void writeModelDeclaration() {
770 if (useOntology()) {
771 String lang = "OWL";
772 if (isTrue( OPT_LANG_DAML )) {
773 lang = "DAML";
774 }
775 else if (isTrue( OPT_LANG_RDFS )) {
776 lang = "RDFS";
777 }
778 writeln( 1, "/** <p>The ontology model that holds the vocabulary terms</p> */" );
779 writeln( 1, "private static OntModel m_model = ModelFactory.createOntologyModel( OntModelSpec." + lang + "_MEM, null );" );
780 }
781 else {
782 writeln( 1, "/** <p>The RDF model that holds the vocabulary terms</p> */" );
783 writeln( 1, "private static Model m_model = ModelFactory.createDefaultModel();" );
784 }
785
786 writeln( 1 );
787 }
788
789 /** Write the string and resource that represent the namespace */
790 protected void writeNamespace() {
791 String nsURI = determineNamespaceURI();
792
793 writeln( 1, "/** <p>The namespace of the vocabulary as a string</p> */" );
794 writeln( 1, "public static final String NS = \"" + nsURI + "\";" );
795 writeln( 1 );
796
797 writeln( 1, "/** <p>The namespace of the vocabulary as a string</p>" );
798 writeln( 1, " * @see #NS */" );
799 writeln( 1, "public static String getURI() {return NS;}" );
800 writeln( 1 );
801
802 writeln( 1, "/** <p>The namespace of the vocabulary as a resource</p> */" );
803 writeln( 1, "public static final Resource NAMESPACE = m_model.createResource( NS );" );
804 writeln( 1 );
805 }
806
807
808 /** Determine what the namespace URI for this vocabulary is */
809 protected String determineNamespaceURI() {
810 // easy: it was set by the user
811 if (hasResourceValue( OPT_NAMESPACE )) {
812 String ns = getResource( OPT_NAMESPACE ).getURI();
813
814 // save the namespace URI as the main included uri for the filter
815 m_includeURI.add( ns );
816
817 return ns;
818 }
819
820 // alternatively, the default namespace may be set in the prefix mapping read from the input document
821 String defaultNS = m_source.getNsPrefixURI( "" );
822 if (defaultNS == null) {
823 defaultNS = m_source.getBaseModel().getNsPrefixURI( "" );
824 }
825
826 if (defaultNS != null) {
827 m_includeURI.add( defaultNS );
828 return defaultNS;
829 }
830
831 // if we are using an ontology model, we can get the namespace URI from the ontology element
832 try {
833 Resource ont = m_source.getBaseModel()
834 .listStatements( null, RDF.type, m_source.getProfile().ONTOLOGY() )
835 .nextStatement()
836 .getSubject();
837
838 String uri = ont.getURI();
839
840 // ensure ends with namespace sep char
841 char ch = uri.charAt( uri.length() - 1 );
842 boolean endsWithNCNameCh = XMLChar.isNCName( ch );
843 uri = endsWithNCNameCh ? uri + "#" : uri;
844
845 // save the namespace URI as the main included uri for the filter
846 m_includeURI.add( uri );
847
848 return uri;
849 }
850 catch (Exception e) {
851 abort( "Could not determine the base URI for the input vocabulary", null );
852 return null;
853 }
854 }
855
856
857 /** Write the list of properties */
858 protected void writeProperties() {
859 if (isTrue( OPT_NOPROPERTIES )) {
860 return;
861 }
862
863 if (hasValue( OPT_PROPERTY_SECTION )) {
864 writeln( 0, getValue( OPT_PROPERTY_SECTION ));
865 }
866
867 if (useOntology()) {
868 writeObjectProperties();
869 writeDatatypeProperties();
870 writeAnnotationProperties();
871
872 // we also write out the RDF properties, to mop up any props that are not stated as
873 // object, datatype or annotation properties
874 writeRDFProperties();
875 }
876 else {
877 writeRDFProperties();
878 }
879 }
880
881 /** Write any object properties in the vocabulary */
882 protected void writeObjectProperties() {
883 String template = hasValue( OPT_PROP_TEMPLATE ) ? getValue( OPT_PROP_TEMPLATE ) : DEFAULT_TEMPLATE;
884
885 if (!isTrue( OPT_LANG_RDFS )) {
886 for (Iterator i = m_source.listObjectProperties(); i.hasNext(); ) {
887 writeValue( (Resource) i.next(), template, "ObjectProperty", "createObjectProperty", "_PROP" );
888 }
889 }
890 }
891
892 /** Write any datatype properties in the vocabulary */
893 protected void writeDatatypeProperties() {
894 String template = hasValue( OPT_PROP_TEMPLATE ) ? getValue( OPT_PROP_TEMPLATE ) : DEFAULT_TEMPLATE;
895
896 if (!isTrue( OPT_LANG_RDFS )) {
897 for (Iterator i = m_source.listDatatypeProperties(); i.hasNext(); ) {
898 writeValue( (Resource) i.next(), template, "DatatypeProperty", "createDatatypeProperty", "_PROP" );
899 }
900 }
901 }
902
903 /** Write any annotation properties in the vocabulary */
904 protected void writeAnnotationProperties() {
905 String template = hasValue( OPT_PROP_TEMPLATE ) ? getValue( OPT_PROP_TEMPLATE ) : DEFAULT_TEMPLATE;
906
907 if (!isTrue( OPT_LANG_RDFS )) {
908 for (Iterator i = m_source.listAnnotationProperties(); i.hasNext(); ) {
909 writeValue( (Resource) i.next(), template, "AnnotationProperty", "createAnnotationProperty", "_PROP" );
910 }
911 }
912 }
913
914 /** Write any vanilla RDF properties in the vocabulary */
915 protected void writeRDFProperties() {
916 String template = hasValue( OPT_PROP_TEMPLATE ) ? getValue( OPT_PROP_TEMPLATE ) : DEFAULT_TEMPLATE;
917
918 // select the appropriate properties based on the language choice
919 Resource[] props;
920 if (isTrue( OPT_LANG_OWL )) {
921 props = new Resource[] {OWL.ObjectProperty, OWL.DatatypeProperty, RDF.Property};
922 }
923 else if (isTrue( OPT_LANG_DAML )) {
924 props = new Resource[] {DAML_OIL.ObjectProperty, DAML_OIL.DatatypeProperty, RDF.Property};
925 }
926 else {
927 props = new Resource[] {RDF.Property};
928 }
929
930 // now write the properties
931 for (int j = 0; j < props.length; j++) {
932 for (StmtIterator i = m_source.listStatements( null, RDF.type, props[j] ); i.hasNext(); ) {
933 writeValue( i.nextStatement().getSubject(), template, "Property", "createProperty", "_PROP" );
934 }
935 }
936 }
937
938 /** Write any classes in the vocabulary */
939 protected void writeClasses() {
940 if (isTrue( OPT_NOCLASSES )) {
941 return;
942 }
943
944 if (hasValue( OPT_CLASS_SECTION )) {
945 writeln( 0, getValue( OPT_CLASS_SECTION ));
946 }
947
948 if (useOntology()) {
949 writeOntClasses();
950 }
951 else {
952 writeRDFClasses();
953 }
954 }
955
956 /** Write classes as ontology terms */
957 protected void writeOntClasses() {
958 String template = hasValue( OPT_CLASS_TEMPLATE ) ? getValue( OPT_CLASS_TEMPLATE ) : DEFAULT_TEMPLATE;
959
960 for (Iterator i = m_source.listClasses(); i.hasNext(); ) {
961 writeValue( (Resource) i.next(), template, "OntClass", "createClass", "_CLASS" );
962 }
963 }
964
965 /** Write classes as vanilla RDF terms */
966 protected void writeRDFClasses() {
967 String template = hasValue( OPT_CLASS_TEMPLATE ) ? getValue( OPT_CLASS_TEMPLATE ) : DEFAULT_TEMPLATE;
968
969 // make sure we're looking for the appropriate type of class
970 Resource cls = OWL.Class;
971 if (isTrue( OPT_LANG_DAML )) {
972 cls = DAML_OIL.Class;
973 }
974 else if (isTrue( OPT_LANG_RDFS )) {
975 cls = RDFS.Class;
976 }
977
978 for (StmtIterator i = m_source.listStatements( null, RDF.type, cls ); i.hasNext(); ) {
979 writeValue( i.nextStatement().getSubject(), template, "Resource", "createResource", "_CLASS" );
980 }
981 }
982
983 /** Write any instances (individuals) in the vocabulary */
984 protected void writeIndividuals() {
985 if (isTrue( OPT_NOINDIVIDUALS )) {
986 return;
987 }
988
989 if (hasValue( OPT_INDIVIDUALS_SECTION )) {
990 writeln( 0, getValue( OPT_INDIVIDUALS_SECTION ));
991 }
992
993 if (useOntology()) {
994 writeOntIndividuals();
995 }
996 else {
997 writeRDFIndividuals();
998 }
999 }
1000
1001 /** Write individuals as ontology terms */
1002 protected void writeOntIndividuals() {
1003 String template = hasValue( OPT_INDIVIDUAL_TEMPLATE ) ? getValue( OPT_INDIVIDUAL_TEMPLATE ) : DEFAULT_INDIVIDUAL_TEMPLATE;
1004
1005 for (StmtIterator i = m_source.listStatements( null, RDF.type, (RDFNode) null ); i.hasNext(); ) {
1006 Statement candidate = i.nextStatement();
1007
1008 if (candidate.getObject() instanceof Resource) {
1009 Resource candObj = (Resource)candidate.getObject();
1010
1011 if (!candObj.isAnon()) {
1012 String uri = candObj.getURI();
1013
1014 for (Iterator j = m_includeURI.iterator(); j.hasNext(); ) {
1015 if (uri.startsWith( (String) j.next() )) {
1016 // the subject has an included type
1017 Resource ind = candidate.getSubject();
1018
1019 // do we have a local class resource
1020 String varName = (String) m_resourcesToNames.get( candidate.getObject() );
1021 String valType = (varName != null) ? varName : "m_model.createClass( \"" + uri + "\" )";
1022
1023 // push the individuals type onto the stack
1024 addReplacementPattern( "valtype", valType );
1025 writeValue( ind, template, "Individual", "createIndividual", "_INSTANCE" );
1026 pop( 1 );
1027
1028 break;
1029 }
1030 }
1031 }
1032 }
1033 }
1034 }
1035
1036 /** Write individuals as vanilla RDF terms */
1037 protected void writeRDFIndividuals() {
1038 String template = hasValue( OPT_INDIVIDUAL_TEMPLATE ) ? getValue( OPT_INDIVIDUAL_TEMPLATE ) : DEFAULT_TEMPLATE;
1039
1040 for (StmtIterator i = m_source.listStatements( null, RDF.type, (RDFNode) null ); i.hasNext(); ) {
1041 Statement candidate = i.nextStatement();
1042
1043 if (candidate.getObject() instanceof Resource) {
1044 Resource candObj = candidate.getResource();
1045
1046 if (!candObj.isAnon()) {
1047 String uri = candObj.getURI();
1048
1049 for (Iterator j = m_includeURI.iterator(); j.hasNext(); ) {
1050 if (uri.startsWith( (String) j.next() )) {
1051 // the subject of the sentence has a type that's on our include list
1052 writeValue( candidate.getSubject(), template, "Resource", "createResource", "_INSTANCE" );
1053
1054 break;
1055 }
1056 }
1057 }
1058 }
1059 }
1060 }
1061
1062 /** Write the value declaration out using the given template, optionally creating comments */
1063 protected void writeValue( Resource r, String template, String valueClass, String creator, String disambiguator ) {
1064 if (!filter( r )) {
1065 if (!noComments() && hasComment( r )) {
1066 writeln( 1, formatComment( getComment( r ) ) );
1067 }
1068
1069 // push the local bindings for the substitution onto the stack
1070 addReplacementPattern( "valuri", r.getURI() );
1071 addReplacementPattern( "valname", getValueName( r, disambiguator ));
1072 addReplacementPattern( "valclass", valueClass );
1073 addReplacementPattern( "valcreator", creator );
1074
1075 // write out the value
1076 writeln( 1, substitute( template ) );
1077 writeln( 1 );
1078
1079 // pop the local replacements off the stack
1080 pop( 4 );
1081 }
1082 }
1083
1084 /** Answer true if the given resource has an rdf:comment or daml:comment */
1085 protected boolean hasComment( Resource r ) {
1086 return r.hasProperty( RDFS.comment ) || r.hasProperty( DAML_OIL.comment );
1087 }
1088
1089 /** Answer all of the commentage on the given resource, as a string */
1090 protected String getComment( Resource r ) {
1091 StringBuffer comment = new StringBuffer();
1092
1093 // collect any RDFS or DAML comments attached to the node
1094 for (NodeIterator ni = m_source.listObjectsOfProperty( r, RDFS.comment ); ni.hasNext(); ) {
1095 //comment.append( ((Literal) ni.nextNode()).getLexicalForm().trim() );
1096 RDFNode n = ni.nextNode();
1097 if (n instanceof Literal) {
1098 comment.append( ((Literal) n).getLexicalForm().trim() );
1099 }
1100 else {
1101 LogFactory.getLog( getClass() ).debug( "Not a literal: " + n );
1102 }
1103 }
1104
1105 for (NodeIterator ni = m_source.listObjectsOfProperty( r, DAML_OIL.comment ); ni.hasNext(); ) {
1106 comment.append( ((Literal) ni.nextNode()).getLexicalForm().trim() );
1107 }
1108
1109 return comment.toString();
1110 }
1111
1112 /** Format the comment as Javadoc, and limit the line width */
1113 protected String formatComment( String comment ) {
1114 StringBuffer buf = new StringBuffer();
1115 buf.append( "/** <p>" );
1116
1117 boolean inSpace = false;
1118 int pos = buf.length();
1119 boolean singleLine = true;
1120
1121 // now format the comment by compacting whitespace and limiting the line length
1122 // add the prefix to the start of each line
1123 for (int i = 0; i < comment.length(); i++ ) {
1124 char c = comment.charAt( i );
1125
1126 // compress whitespace
1127 if (Character.isWhitespace( c )) {
1128 if (inSpace) {
1129 continue; // more than one space is ignored
1130 }
1131 else {
1132 c = ' '; // map all whitespace to 0x20
1133 inSpace = true;
1134 }
1135 }
1136 else {
1137 inSpace = false;
1138 }
1139
1140 // escapes?
1141 if (c == '\\') {
1142 c = comment.charAt( ++i );
1143
1144 switch (c) {
1145 case 'n':
1146 buf.append( m_nl );
1147 pos = indentTo( 1, buf );
1148 buf.append( " * " );
1149 pos += 3;
1150 singleLine = false;
1151 break;
1152
1153 default:
1154 // add other escape sequences above
1155 break;
1156 }
1157 }
1158 else if (c == '<') {
1159 buf.append(