Source code: org/outerj/pollo/xmleditor/schema/MsvSchema.java
1 package org.outerj.pollo.xmleditor.schema;
2
3 import com.sun.msv.grammar.*;
4 import com.sun.msv.grammar.util.ExpressionWalker;
5 import com.sun.msv.reader.dtd.DTDReader;
6 import com.sun.msv.reader.util.GrammarLoader;
7 import com.sun.msv.reader.util.IgnoreController;
8 import com.sun.msv.verifier.DocumentDeclaration;
9 import com.sun.msv.verifier.ErrorInfo;
10 import com.sun.msv.verifier.ValidityViolation;
11 import com.sun.msv.verifier.regexp.ExpressionAcceptor;
12 import com.sun.msv.verifier.regexp.REDocumentDeclaration;
13 import com.sun.msv.datatype.xsd.EnumerationFacet;
14 import com.sun.msv.datatype.xsd.StringType;
15 import org.jaxen.SimpleNamespaceContext;
16 import org.outerj.pollo.util.URLFactory;
17 import org.outerj.pollo.xmleditor.exception.PolloException;
18 import org.outerj.pollo.xmleditor.schema.msv.PolloMsvVerifier;
19 import org.outerj.pollo.xmleditor.schema.msv.SAXEventGenerator;
20 import org.outerj.pollo.xmleditor.schema.msv.StopValidationException;
21 import org.outerj.pollo.xmleditor.util.NodeMap;
22 import org.w3c.dom.Document;
23 import org.w3c.dom.Element;
24 import org.w3c.dom.Node;
25 import org.xml.sax.*;
26
27 import javax.xml.parsers.SAXParserFactory;
28 import java.util.*;
29
30 /**
31 * An ISchema implementation using MSV. The schema is read and then flattend
32 * out to the same level as the BasicSchema implementation, meaning information
33 * about content model, required attributes, etc. is ignored. The MSV grammar
34 * is kept however for doing validation (FIXME note to myself: these need to be
35 * cached and shared between open files).
36 *
37 * @author Bruno Dumon, inspired by work by Al Byers.
38 */
39 public class MsvSchema implements ISchema
40 {
41 protected SimpleNamespaceContext namespaceContext = new SimpleNamespaceContext();
42 protected NodeMap elementSchemas;
43 protected String source;
44 protected Grammar grammar;
45 protected PolloMsvVerifier verifier;
46 protected MsvSchemaErrorHandler errorHandler;
47
48
49 /**
50 * Builds elementSchemas NodeMap (as with Basic Schema).
51 */
52 protected void init(HashMap initParams)
53 throws Exception
54 {
55 try
56 {
57 source = (String) initParams.get("source");
58
59 if (source == null || source.trim().equals(""))
60 {
61 throw new PolloException("[MsvSchema] The source init-param is not specified!");
62 }
63
64 // load grammar
65 SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
66 saxParserFactory.setNamespaceAware(true);
67
68 boolean trackOptionalAttributes = false;
69 InputSource inputSource = new InputSource(URLFactory.createUrl(source).toExternalForm());
70 String type = (String)initParams.get("type");
71 if (type != null && type.equals("dtd"))
72 {
73 grammar = DTDReader.parse(inputSource, new PolloMsvController());
74 trackOptionalAttributes = true;
75 }
76 else
77 {
78 // let MSV autodetect
79 grammar = GrammarLoader.loadSchema(inputSource, new PolloMsvController(), saxParserFactory);
80 }
81
82 if (grammar == null)
83 throw new PolloException("[MsvSchema] Could not load the schema with MSV (parse or loadSchema returned null)");
84
85 elementSchemas = new NodeMap();
86
87 MsvGrammarWalker walker = new MsvGrammarWalker(trackOptionalAttributes);
88 grammar.getTopLevel().visit(walker);
89
90 }
91 catch (Exception e)
92 {
93 throw new PolloException("[MsvSchema] Error initialising MSV schema", e);
94 }
95 }
96
97 /**
98 * The goals of MsvGrammarWalker is to convert an MSV Grammar object
99 * to Pollo's own simple schema-structure.
100 */
101 public class MsvGrammarWalker extends ExpressionWalker
102 {
103 final Set visitedNodes = new HashSet();
104 final Set visitedSubNodes = new HashSet();
105 ElementSchema elementSchema; // the current ElementSchema being created
106 final SubNodeWalker subNodeWalker = new SubNodeWalker();
107 final boolean trackOptionalAttributes;
108
109 /**
110 * @param trackOptionalAttributes in general, should only be used for DTD's
111 * because for other schema languages the content model for attributes can be more complex,
112 * or the same element name may be reused with different required attributes on it.
113 */
114 public MsvGrammarWalker(boolean trackOptionalAttributes)
115 {
116 this.trackOptionalAttributes = trackOptionalAttributes;
117 }
118
119 public void onElement(ElementExp exp)
120 {
121 // if we havn't visited this expression before (can be cyclic!)
122 if (!visitedNodes.contains(exp))
123 {
124 visitedNodes.add(exp);
125 String[] names = getNameParts(exp.getNameClass());
126 if (names != null)
127 {
128 elementSchema = (ElementSchema) elementSchemas.get(names[0], names[1]);
129 if (elementSchema == null)
130 {
131 elementSchema = new ElementSchema();
132 elementSchema.namespaceURI = names[0];
133 elementSchema.localName = names[1];
134 elementSchemas.put(names[0], names[1], elementSchema);
135 }
136
137 // find out what the subelements of this element are:
138 visitedSubNodes.clear();
139 exp.contentModel.visit(subNodeWalker);
140 }
141 else
142 {
143 // other name classes not yet supported
144 }
145
146 // now recursively visit the childexpressions
147 super.onElement(exp);
148 }
149 }
150
151 /**
152 * This walker records all elements that can appear as subelement, and
153 * all attributes that the element can have.
154 */
155 public class SubNodeWalker extends ExpressionWalker
156 {
157 protected AttributeSchema attrSchema;
158 protected final HashSet attrValues = new HashSet();
159 protected final AttributeWalker attrWalker = new AttributeWalker();
160 protected boolean nextAttributeOptional = false;
161
162 public void onElement(ElementExp subExp)
163 {
164 if (!visitedSubNodes.contains(subExp))
165 {
166 visitedSubNodes.add(subExp);
167 String[] names = getNameParts(subExp.getNameClass());
168 if (names != null)
169 {
170 // if a subelement with this name does not yet exist
171 if (!elementSchema.containsSubElement(names[0], names[1]))
172 {
173 ElementSchema.SubElement subElement = elementSchema.createSubElement(names[0], names[1]);
174 elementSchema.subelements.put(names[0], names[1], subElement);
175 }
176 }
177 else
178 {
179 // other name classes not yet supported
180 }
181 // sub expressions are not further visited
182 }
183 }
184
185 public void onAttribute(AttributeExp attrExp)
186 {
187 String[] names = getNameParts(attrExp.getNameClass());
188 if (names != null)
189 {
190 // if an attribute with this name does not yet exist
191 if (elementSchema.getAttributeSchema(names[0], names[1]) == null)
192 {
193 attrSchema = new AttributeSchema(names[0], names[1], null, null);
194
195 if (trackOptionalAttributes)
196 {
197 attrSchema.required = !nextAttributeOptional;
198 nextAttributeOptional = false;
199 }
200
201 // search possible values for this attribute by using the AttributeWalker
202 attrValues.clear();
203 attrExp.exp.visit(attrWalker);
204 if (attrValues.size() > 0)
205 attrSchema.values = (String [])attrValues.toArray(new String [] {});
206 elementSchema.attributes.add(attrSchema);
207 }
208 }
209 }
210
211 public void onChoice(ChoiceExp choiceExp)
212 {
213 if (trackOptionalAttributes)
214 {
215 if (choiceExp.exp1 == Expression.epsilon)
216 {
217 if (choiceExp.exp2 instanceof AttributeExp)
218 {
219 nextAttributeOptional = true;
220 choiceExp.exp2.visit(this);
221 return;
222 }
223 }
224 else if (choiceExp.exp2 == Expression.epsilon)
225 {
226 if (choiceExp.exp1 instanceof AttributeExp)
227 {
228 nextAttributeOptional = true;
229 choiceExp.exp1.visit(this);
230 return;
231 }
232 }
233 }
234 super.onChoice(choiceExp);
235 }
236
237 public void onValue(ValueExp exp)
238 {
239 elementSchema.subtexts.add(exp.value);
240 }
241
242 public void onData(DataExp exp)
243 {
244 if (exp.getType() instanceof EnumerationFacet)
245 {
246 Iterator it = ((EnumerationFacet)exp.getType()).values.iterator();
247 while (it.hasNext())
248 {
249 String value = it.next().toString();
250 elementSchema.subtexts.add(value);
251 }
252 }
253 else if (exp.getType() instanceof StringType)
254 {
255 elementSchema.subtexts.add("");
256 }
257 }
258
259 public void onAnyString()
260 {
261 elementSchema.subtexts.add("");
262 }
263
264 /**
265 * AttributeWalker finds values an attribute may have. Note that this does
266 * not work for e.g. DTD's because there the possible attribute values are
267 * part of the datatype. Works fine for RELAX-NG though.
268 */
269 public class AttributeWalker extends ExpressionWalker
270 {
271 public void onElement(ElementExp exp)
272 {
273 // avoid recursing through elements (should not occur inside an attribute anyhow).
274 }
275
276 public void onValue(ValueExp exp)
277 {
278 attrValues.add(exp.value.toString());
279 }
280
281 public void onData(DataExp exp)
282 {
283 if (exp.getType() instanceof EnumerationFacet)
284 {
285 Iterator it = ((EnumerationFacet)exp.getType()).values.iterator();
286 while (it.hasNext())
287 {
288 attrValues.add(it.next().toString());
289 }
290 }
291 }
292 }
293 }
294 }
295
296
297 private final String[] getNameParts(NameClass nameClass)
298 {
299 if (nameClass instanceof SimpleNameClass)
300 {
301 SimpleNameClass simple = (SimpleNameClass) nameClass;
302
303 // pollo expects that no namespace == null string
304 String namespaceURI = simple.namespaceURI;
305 if (namespaceURI.equals(""))
306 namespaceURI = null;
307
308 return new String[]{namespaceURI, simple.localName};
309 }
310 else
311 return null;
312 }
313
314 public class PolloMsvController extends IgnoreController
315 {
316 public void warning(Locator[] locs, String errorMessage)
317 {
318 System.out.println("MSV warning: " + errorMessage);
319 }
320
321 public void error(Locator[] locs, String errorMessage, Exception nestedException)
322 {
323 System.out.println("MSV error: " + errorMessage);
324 }
325 }
326
327
328 //
329 // The rest of the methods is copied from BasicSchema.
330 //
331
332 /**
333 * Returns the list of attributes an element can have.
334 */
335 public Collection getAttributesFor(Element element)
336 {
337 ElementSchema elementSchema = getElementSchema(element.getNamespaceURI(), element.getLocalName());
338
339 if (elementSchema == null)
340 return new LinkedList();
341 else
342 return elementSchema.attributes;
343 }
344
345 /**
346 * Returns true if the element <i>child</i> is allowed as child
347 * of the element <i>parent</i>.
348 */
349 public boolean isChildAllowed(Element parent, Element child)
350 {
351 ElementSchema elementSchema = getElementSchema(parent.getNamespaceURI(), parent.getLocalName());
352 if (elementSchema != null)
353 {
354 return elementSchema.isAllowedAsSubElement(child);
355 }
356 else
357 {
358 return false;
359 }
360 }
361
362 /**
363 * Returns an array containing a list of possible values an attribute can have,
364 * or null if such a list is not available.
365 */
366 public String[] getPossibleAttributeValues(Element element, String namespaceURI, String localName)
367 {
368 AttributeSchema attrSchema = getAttributeSchema(element, namespaceURI, localName);
369 if (attrSchema != null)
370 {
371 return attrSchema.getPossibleValues(element);
372 }
373 return null;
374 }
375
376
377 public Collection getAllowedSubElements(Element element)
378 {
379 ElementSchema elementSchema = getElementSchema(element.getNamespaceURI(), element.getLocalName());
380 if (elementSchema != null)
381 {
382 return elementSchema.subelements.values();
383 }
384 else
385 {
386 return Collections.EMPTY_LIST;
387 }
388 }
389
390 public Collection getAllowedSubTexts(Element element)
391 {
392 ElementSchema elementSchema = getElementSchema(element.getNamespaceURI(), element.getLocalName());
393 if (elementSchema != null)
394 {
395 return elementSchema.subtexts;
396 }
397 else
398 {
399 return Collections.EMPTY_LIST;
400 }
401 }
402
403 protected ElementSchema getElementSchema(String namespaceURI, String localName)
404 {
405 return (ElementSchema) elementSchemas.get(namespaceURI, localName);
406 }
407
408 protected AttributeSchema getAttributeSchema(Element element, String namespaceURI, String localName)
409 {
410 ElementSchema elementSchema = getElementSchema(element.getNamespaceURI(), element.getLocalName());
411
412 if (elementSchema == null)
413 return null;
414 else
415 return elementSchema.getAttributeSchema(namespaceURI, localName);
416 }
417
418 public Collection validate(Document document)
419 throws ValidationNotSupportedException, Exception
420 {
421 initializeVerifier();
422
423 SAXEventGenerator generator = new SAXEventGenerator(document);
424 errorHandler.errorCollection = new ArrayList();
425 errorHandler.generator = generator;
426 verifier.setStopNow(false);
427
428 generator.makeEvent(verifier);
429
430 return errorHandler.errorCollection;
431 }
432
433
434 /**
435 * Verifies the document up to the given element, and then returns the
436 * Expression describing the content model that the verifiers expects
437 * for that element.
438 *
439 * <p>
440 * Implementation note: this depends on the classes PolloMsvVerifier and
441 * SAXEventGenerator to stop the validation at a certain node.
442 */
443 public Expression getExpression(Document document, Element element)
444 throws Exception
445 {
446 initializeVerifier();
447
448 SAXEventGenerator generator = new SAXEventGenerator(document);
449 generator.stopAtNode(element);
450 errorHandler.errorCollection = new ArrayList();
451 errorHandler.generator = generator;
452 verifier.setStopNow(false);
453
454 // validate up to the wanted node
455 try
456 {
457 generator.makeEvent(verifier);
458 }
459 catch (StopValidationException e)
460 {
461 ExpressionAcceptor acceptor = (ExpressionAcceptor) e.getAcceptor();
462 Expression expression = acceptor.getExpression();
463
464 verifier.setStopNow(false);
465 return expression;
466 }
467 catch (Exception e)
468 {
469 verifier.setStopNow(false);
470 throw e;
471 }
472
473 verifier.setStopNow(false);
474 throw new PolloException("[MsvSchema] Could not find expression for the wanted node.");
475 }
476
477 protected void initializeVerifier()
478 {
479 // normally verifiers should be reusable but i noticed some problems that dissappeard
480 // with not reusing them
481 //if (verifier == null)
482 //{
483 DocumentDeclaration documentDeclaration = new REDocumentDeclaration(grammar);
484 this.errorHandler = new MsvSchemaErrorHandler();
485 this.verifier = new PolloMsvVerifier(documentDeclaration, errorHandler);
486 //}
487 }
488
489 public class MsvSchemaErrorHandler implements ErrorHandler
490 {
491 public Collection errorCollection;
492 public SAXEventGenerator generator;
493
494 public void error(SAXParseException e) throws SAXException
495 {
496 addError(e);
497 }
498
499 public void fatalError(SAXParseException e) throws SAXException
500 {
501 addError(e);
502 }
503
504 public void warning(SAXParseException e) throws SAXException
505 {
506 addError(e);
507 }
508
509 protected void addError(SAXParseException e)
510 {
511 Node location = null;
512 String attrNamespaceURI = null;
513 String attrLocalName = null;
514
515 if (e instanceof ValidityViolation)
516 {
517 ErrorInfo errorInfo = ((ValidityViolation) e).getErrorInfo();
518 if (errorInfo instanceof ErrorInfo.BadText)
519 {
520 location = generator.getLastCharacterData();
521 }
522 else
523 {
524 location = generator.getLastElement();
525 if (errorInfo instanceof ErrorInfo.BadAttribute)
526 {
527 ErrorInfo.BadAttribute badAttr = (ErrorInfo.BadAttribute) errorInfo;
528 attrNamespaceURI = badAttr.attNamespaceURI;
529 attrLocalName = badAttr.attLocalName;
530 }
531 }
532 }
533 errorCollection.add(new ValidationErrorInfo(location, e.getMessage(), attrNamespaceURI, attrLocalName));
534 }
535 }
536 }