1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.openjpa.lib.meta;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStreamReader;
24 import java.io.Reader;
25 import java.net.URL;
26 import java.security.AccessController;
27 import java.util.ArrayList;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.LinkedList;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Set;
36 import javax.xml.parsers.SAXParser;
37
38 import org.xml.sax.Attributes;
39 import org.xml.sax.InputSource;
40 import org.xml.sax.Locator;
41 import org.xml.sax.SAXException;
42 import org.xml.sax.SAXParseException;
43 import org.xml.sax.ext.LexicalHandler;
44 import org.xml.sax.helpers.DefaultHandler;
45 import org.apache.openjpa.lib.log.Log;
46 import org.apache.openjpa.lib.util.J2DoPrivHelper;
47 import org.apache.openjpa.lib.util.JavaVersions;
48 import org.apache.openjpa.lib.util.Localizer.Message;
49 import org.apache.openjpa.lib.util.Localizer;
50 import org.apache.openjpa.lib.xml.Commentable;
51 import org.apache.openjpa.lib.xml.DocTypeReader;
52 import org.apache.openjpa.lib.xml.Location;
53 import org.apache.openjpa.lib.xml.XMLFactory;
54
55 /**
56 * Custom SAX parser used by the system to quickly parse metadata files.
57 * Subclasses should handle the processing of the content.
58 *
59 * @author Abe White
60 * @nojavadoc
61 */
62 public abstract class XMLMetaDataParser extends DefaultHandler
63 implements LexicalHandler, MetaDataParser {
64
65 private static final Localizer _loc = Localizer.forPackage
66 (XMLMetaDataParser.class);
67 private static boolean _schemaBug;
68
69 static {
70 try {
71 // check for Xerces version 2.0.2 to see if we need to disable
72 // schema validation, which works around the bug reported at:
73 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708859
74 _schemaBug = "Xerces-J 2.0.2".equals(Class.forName
75 ("org.apache.xerces.impl.Version").getField("fVersion").
76 get(null));
77 } catch (Throwable t) {
78 // Xerces might not be available
79 _schemaBug = false;
80 }
81 }
82
83 // map of classloaders to sets of parsed locations, so that we don't parse
84 // the same resource multiple times for the same class
85 private Map _parsed = null;
86
87 private Log _log = null;
88 private boolean _validating = true;
89 private boolean _systemId = true;
90 private boolean _caching = true;
91 private boolean _parseText = true;
92 private boolean _parseComments = true;
93 private String _suffix = null;
94 private ClassLoader _loader = null;
95 private ClassLoader _curLoader = null;
96
97 // state for current parse
98 private final Collection _curResults = new LinkedList();
99 private List _results = null;
100 private String _sourceName = null;
101 private File _sourceFile = null;
102 private StringBuffer _text = null;
103 private List _comments = null;
104 private Location _location = new Location();
105 private LexicalHandler _lh = null;
106 private int _depth = -1;
107 private int _ignore = Integer.MAX_VALUE;
108
109 /**
110 * Whether to parse element text.
111 */
112 public boolean getParseText() {
113 return _parseText;
114 }
115
116 /**
117 * Whether to parse element text.
118 */
119 public void setParseText(boolean text) {
120 _parseText = text;
121 }
122
123 /**
124 * Whether to parse element comments.
125 */
126 public boolean getParseComments() {
127 return _parseComments;
128 }
129
130 /**
131 * Whether to parse element comments.
132 */
133 public void setParseComments(boolean comments) {
134 _parseComments = comments;
135 }
136
137 /**
138 * The XML document location.
139 */
140 public Location getLocation() {
141 return _location;
142 }
143
144 /**
145 * The lexical handler that should be registered with the SAX parser used
146 * by this class. Since the <code>org.xml.sax.ext</code> package is not
147 * a required part of SAX2, this handler might not be used by the parser.
148 */
149 public LexicalHandler getLexicalHandler() {
150 return _lh;
151 }
152
153 /**
154 * The lexical handler that should be registered with the SAX parser used
155 * by this class. Since the <code>org.xml.sax.ext</code> package is not
156 * a required part of SAX2, this handler might not be used by the parser.
157 */
158 public void setLexicalHandler(LexicalHandler lh) {
159 _lh = lh;
160 }
161
162 /**
163 * The XML document location.
164 */
165 public void setLocation(Location location) {
166 _location = location;
167 }
168
169 /**
170 * Whether to use the source name as the XML system id.
171 */
172 public boolean getSourceIsSystemId() {
173 return _systemId;
174 }
175
176 /**
177 * Whether to use the source name as the XML system id.
178 */
179 public void setSourceIsSystemId(boolean systemId) {
180 _systemId = systemId;
181 }
182
183 /**
184 * Whether this is a validating parser.
185 */
186 public boolean isValidating() {
187 return _validating;
188 }
189
190 /**
191 * Whether this is a validating parser.
192 */
193 public void setValidating(boolean validating) {
194 _validating = validating;
195 }
196
197 /**
198 * Expected suffix for metadata resources, or null if unknown.
199 */
200 public String getSuffix() {
201 return _suffix;
202 }
203
204 /**
205 * Expected suffix for metadata resources, or null if unknown.
206 */
207 public void setSuffix(String suffix) {
208 _suffix = suffix;
209 }
210
211 /**
212 * Whether parsed resource names are cached to avoid duplicate parsing.
213 */
214 public boolean isCaching() {
215 return _caching;
216 }
217
218 /**
219 * Whether parsed resource names are cached to avoid duplicate parsing.
220 */
221 public void setCaching(boolean caching) {
222 _caching = caching;
223 if (!caching)
224 clear();
225 }
226
227 /**
228 * The log to write to.
229 */
230 public Log getLog() {
231 return _log;
232 }
233
234 /**
235 * The log to write to.
236 */
237 public void setLog(Log log) {
238 _log = log;
239 }
240
241 /**
242 * Classloader to use for class name resolution.
243 */
244 public ClassLoader getClassLoader() {
245 return _loader;
246 }
247
248 /**
249 * Classloader to use for class name resolution.
250 */
251 public void setClassLoader(ClassLoader loader) {
252 _loader = loader;
253 }
254
255 public List getResults() {
256 if (_results == null)
257 return Collections.EMPTY_LIST;
258 return _results;
259 }
260
261 public void parse(String rsrc) throws IOException {
262 if (rsrc != null)
263 parse(new ResourceMetaDataIterator(rsrc, _loader));
264 }
265
266 public void parse(URL url) throws IOException {
267 if (url != null)
268 parse(new URLMetaDataIterator(url));
269 }
270
271 public void parse(File file) throws IOException {
272 if (file == null)
273 return;
274 if (!((Boolean) AccessController.doPrivileged(J2DoPrivHelper
275 .isDirectoryAction(file))).booleanValue())
276 parse(new FileMetaDataIterator(file));
277 else {
278 String suff = (_suffix == null) ? "" : _suffix;
279 parse(new FileMetaDataIterator(file,
280 new SuffixMetaDataFilter(suff)));
281 }
282 }
283
284 public void parse(Class cls, boolean topDown) throws IOException {
285 String suff = (_suffix == null) ? "" : _suffix;
286 parse(new ClassMetaDataIterator(cls, suff, topDown), !topDown);
287 }
288
289 public void parse(Reader xml, String sourceName) throws IOException {
290 if (xml != null && (sourceName == null || !parsed(sourceName)))
291 parseNewResource(xml, sourceName);
292 }
293
294 public void parse(MetaDataIterator itr) throws IOException {
295 parse(itr, false);
296 }
297
298 /**
299 * Parse the resources returned by the given iterator, optionally stopping
300 * when the first valid resource is found.
301 */
302 private void parse(MetaDataIterator itr, boolean stopFirst)
303 throws IOException {
304 if (itr == null)
305 return;
306 try {
307 String sourceName;
308 while (itr.hasNext()) {
309 sourceName = itr.next().toString();
310 if (parsed(sourceName)) {
311 if (stopFirst)
312 break;
313 continue;
314 }
315
316 // individual files of the resource might already be parsed
317 _sourceFile = itr.getFile();
318 parseNewResource(new InputStreamReader(itr.getInputStream()),
319 sourceName);
320 if (stopFirst)
321 break;
322 }
323 }
324 finally {
325 itr.close();
326 }
327 }
328
329 /**
330 * Parse a previously-unseen source. All parsing methods delegate
331 * to this one.
332 */
333 protected void parseNewResource(Reader xml, String sourceName)
334 throws IOException {
335 if (_log != null && _log.isTraceEnabled())
336 _log.trace(_loc.get("start-parse", sourceName));
337
338 // even if we want to validate, specify that it won't happen
339 // if we have neither a DocType not a Schema
340 Object schemaSource = getSchemaSource();
341 if (schemaSource != null && _schemaBug) {
342 if (_log != null && _log.isTraceEnabled())
343 _log.trace(_loc.get("parser-schema-bug"));
344 schemaSource = null;
345 }
346 boolean validating = _validating && (getDocType() != null
347 || schemaSource != null);
348
349 // parse the metadata with a SAX parser
350 try {
351 _sourceName = sourceName;
352 SAXParser parser = XMLFactory.getSAXParser(validating, true);
353 Object schema = null;
354 if (validating) {
355 schema = schemaSource;
356 if (schema == null && getDocType() != null)
357 xml = new DocTypeReader(xml, getDocType());
358 }
359
360 if (_parseComments || _lh != null)
361 parser.setProperty
362 ("http://xml.org/sax/properties/lexical-handler", this);
363
364 if (schema != null) {
365 parser.setProperty
366 ("http://java.sun.com/xml/jaxp/properties/schemaLanguage",
367 "http://www.w3.org/2001/XMLSchema");
368 parser.setProperty
369 ("http://java.sun.com/xml/jaxp/properties/schemaSource",
370 schema);
371 }
372
373 InputSource is = new InputSource(xml);
374 if (_systemId && sourceName != null)
375 is.setSystemId(sourceName);
376 parser.parse(is, this);
377 finish();
378 } catch (SAXException se) {
379 IOException ioe = new IOException(se.toString());
380 JavaVersions.initCause(ioe, se);
381 throw ioe;
382 } finally {
383 reset();
384 }
385 }
386
387 /**
388 * Return true if the given source is parsed. Otherwise, record that
389 * it will be parsed.
390 */
391 protected boolean parsed(String src) {
392 if (!_caching)
393 return false;
394 if (_parsed == null)
395 _parsed = new HashMap();
396
397 ClassLoader loader = currentClassLoader();
398 Set set = (Set) _parsed.get(loader);
399 if (set == null) {
400 set = new HashSet();
401 _parsed.put(loader, set);
402 }
403 boolean added = set.add(src);
404 if (!added && _log != null && _log.isTraceEnabled())
405 _log.trace(_loc.get("already-parsed", src));
406 return !added;
407 }
408
409 public void clear() {
410 if (_log != null && _log.isTraceEnabled())
411 _log.trace(_loc.get("clear-parser", this));
412 if (_parsed != null)
413 _parsed.clear();
414 }
415
416 public void error(SAXParseException se) throws SAXException {
417 throw getException(se.toString());
418 }
419
420 public void fatalError(SAXParseException se) throws SAXException {
421 throw getException(se.toString());
422 }
423
424 public void setDocumentLocator(Locator locator) {
425 _location.setLocator(locator);
426 }
427
428 public void startElement(String uri, String name, String qName,
429 Attributes attrs) throws SAXException {
430 _depth++;
431 if (_depth <= _ignore)
432 if (!startElement(qName, attrs))
433 ignoreContent(true);
434 }
435
436 public void endElement(String uri, String name, String qName)
437 throws SAXException {
438 if (_depth < _ignore)
439 endElement(qName);
440 _text = null;
441 if (_comments != null)
442 _comments.clear();
443 if (_depth == _ignore)
444 _ignore = Integer.MAX_VALUE;
445 _depth--;
446 }
447
448 public void characters(char[] ch, int start, int length) {
449 if (_parseText && _depth <= _ignore) {
450 if (_text == null)
451 _text = new StringBuffer();
452 _text.append(ch, start, length);
453 }
454 }
455
456 public void comment(char[] ch, int start, int length) throws SAXException {
457 if (_parseComments && _depth <= _ignore) {
458 if (_comments == null)
459 _comments = new ArrayList(3);
460 _comments.add(String.valueOf(ch, start, length));
461 }
462 if (_lh != null)
463 _lh.comment(ch, start, length);
464 }
465
466 public void startCDATA() throws SAXException {
467 if (_lh != null)
468 _lh.startCDATA();
469 }
470
471 public void endCDATA() throws SAXException {
472 if (_lh != null)
473 _lh.endCDATA();
474 }
475
476 public void startDTD(String name, String publicId, String systemId)
477 throws SAXException {
478 if (_lh != null)
479 _lh.startDTD(name, publicId, systemId);
480 }
481
482 public void endDTD() throws SAXException {
483 if (_lh != null)
484 _lh.endDTD();
485 }
486
487 public void startEntity(String name) throws SAXException {
488 if (_lh != null)
489 _lh.startEntity(name);
490 }
491
492 public void endEntity(String name) throws SAXException {
493 if (_lh != null)
494 _lh.endEntity(name);
495 }
496
497 /**
498 * Override this method marking the start of some element. If this method
499 * returns false, the content of the element and the end element event will
500 * be ignored.
501 */
502 protected abstract boolean startElement(String name, Attributes attrs)
503 throws SAXException;
504
505 /**
506 * Override this method marking the end of some element.
507 */
508 protected abstract void endElement(String name) throws SAXException;
509
510 /**
511 * Add a result to be returned from the current parse.
512 */
513 protected void addResult(Object result) {
514 if (_log != null && _log.isTraceEnabled())
515 _log.trace(_loc.get("add-result", result));
516 _curResults.add(result);
517 }
518
519 /**
520 * Override this method to finish up after a parse; this is only
521 * called if no errors are encountered during parsing. Subclasses should
522 * call <code>super.finish()</code> to resolve superclass state.
523 */
524 protected void finish() {
525 if (_log != null && _log.isTraceEnabled())
526 _log.trace(_loc.get("end-parse", getSourceName()));
527 _results = new ArrayList(_curResults);
528 }
529
530 /**
531 * Override this method to clear any state and ready the parser for
532 * a new document. Subclasses should call
533 * <code>super.reset()</code> to clear superclass state.
534 */
535 protected void reset() {
536 _curResults.clear();
537 _curLoader = null;
538 _sourceName = null;
539 _sourceFile = null;
540 _depth = -1;
541 _ignore = Integer.MAX_VALUE;
542 if (_comments != null)
543 _comments.clear();
544 }
545
546 /**
547 * Implement to return the XML schema source for the document. Returns
548 * null by default. May return:
549 * <ul>
550 * <li><code>String</code> pointing to schema URI.</li>
551 * <li><code>InputStream</code> containing schema contents.</li>
552 * <li><code>InputSource</code> containing schema contents.</li>
553 * <li><code>File</code> containing schema contents.</li>
554 * <li>Array of any of the above elements.</li>
555 * </ul>
556 */
557 protected Object getSchemaSource() throws IOException {
558 return null;
559 }
560
561 /**
562 * Override this method to return any <code>DOCTYPE</code> declaration
563 * that should be dynamically included in xml documents that will be
564 * validated. Returns null by default.
565 */
566 protected Reader getDocType() throws IOException {
567 return null;
568 }
569
570 /**
571 * Return the name of the source file being parsed.
572 */
573 protected String getSourceName() {
574 return _sourceName;
575 }
576
577 /**
578 * Return the file of the source being parsed.
579 */
580 protected File getSourceFile() {
581 return _sourceFile;
582 }
583
584 /**
585 * Add current comments to the given entity. By default, assumes entity
586 * is {@link Commentable}.
587 */
588 protected void addComments(Object obj) {
589 String[] comments = currentComments();
590 if (comments.length > 0 && obj instanceof Commentable)
591 ((Commentable) obj).setComments(comments);
592 }
593
594 /**
595 * Array of comments for the current node, or empty array if none.
596 */
597 protected String[] currentComments() {
598 if (_comments == null || _comments.isEmpty())
599 return Commentable.EMPTY_COMMENTS;
600 return (String[]) _comments.toArray(new String[_comments.size()]);
601 }
602
603 /**
604 * Return the text value within the current node.
605 */
606 protected String currentText() {
607 if (_text == null)
608 return "";
609 return _text.toString().trim();
610 }
611
612 /**
613 * Return the current location within the source file.
614 */
615 protected String currentLocation() {
616 return " [" + _loc.get("loc-prefix") + _location.getLocation() + "]";
617 }
618
619 /**
620 * Return the parse depth. Within the root element, the depth is 0,
621 * within the first nested element, it is 1, and so forth.
622 */
623 protected int currentDepth() {
624 return _depth;
625 }
626
627 /**
628 * Return the class loader to use when resolving resources and loading
629 * classes.
630 */
631 protected ClassLoader currentClassLoader() {
632 if (_loader != null)
633 return _loader;
634 if (_curLoader == null)
635 _curLoader = (ClassLoader) AccessController.doPrivileged(
636 J2DoPrivHelper.getContextClassLoaderAction());
637 return _curLoader;
638 }
639
640 /**
641 * Ignore all content below the current element.
642 *
643 * @param ignoreEnd whether to ignore the end element event
644 */
645 protected void ignoreContent(boolean ignoreEnd) {
646 _ignore = _depth;
647 if (!ignoreEnd)
648 _ignore++;
649 }
650
651 /**
652 * Returns a SAXException with the source file name and the given error
653 * message.
654 */
655 protected SAXException getException(String msg) {
656 return new SAXException(getSourceName() + currentLocation() +
657 ": " + msg);
658 }
659
660 /**
661 * Returns a SAXException with the source file name and the given error
662 * message.
663 */
664 protected SAXException getException(Message msg) {
665 return new SAXException(getSourceName() + currentLocation() +
666 ": " + msg.getMessage());
667 }
668
669 /**
670 * Returns a SAXException with the source file name and the given error
671 * message.
672 */
673 protected SAXException getException(Message msg, Throwable cause) {
674 if (cause != null && _log != null && _log.isTraceEnabled())
675 _log.trace(_loc.get("sax-exception",
676 getSourceName(), _location.getLocation()), cause);
677 SAXException e = new SAXException(getSourceName() + currentLocation() +
678 ": " + msg + " [" + cause + "]");
679 e.initCause(cause);
680 return e;
681 }
682 }