1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.xerces.impl.dtd.models;
19
20 import java.util.HashMap;
21
22 import org.apache.xerces.impl.dtd.XMLContentSpec;
23 import org.apache.xerces.xni.QName;
24
25 /**
26 * DFAContentModel is the derivative of ContentModel that does
27 * all of the non-trivial element content validation. This class does
28 * the conversion from the regular expression to the DFA that
29 * it then uses in its validation algorithm.
30 * <p>
31 * <b>Note:</b> Upstream work insures that this class will never see
32 * a content model with PCDATA in it. Any model with PCDATA is 'mixed'
33 * and is handled via the MixedContentModel class since mixed models
34 * are very constrained in form and easily handled via a special case.
35 * This also makes implementation of this class much easier.
36 *
37 * @xerces.internal
38 *
39 * @version $Id: DFAContentModel.java 572057 2007-09-02 18:03:20Z mrglavas $
40 */
41 public class DFAContentModel
42 implements ContentModelValidator {
43
44 //
45 // Constants
46 //
47 // special strings
48
49 /** Epsilon string. */
50 private static String fEpsilonString = "<<CMNODE_EPSILON>>";
51
52 /** End-of-content string. */
53 private static String fEOCString = "<<CMNODE_EOC>>";
54
55 /** initializing static members **/
56 static {
57 fEpsilonString = fEpsilonString.intern();
58 fEOCString = fEOCString.intern();
59 }
60
61 // debugging
62
63 /** Set to true to debug content model validation. */
64 private static final boolean DEBUG_VALIDATE_CONTENT = false;
65
66 //
67 // Data
68 //
69
70 /* this is the EquivClassComparator object */
71 //private EquivClassComparator comparator = null;
72
73 /**
74 * This is the map of unique input symbol elements to indices into
75 * each state's per-input symbol transition table entry. This is part
76 * of the built DFA information that must be kept around to do the
77 * actual validation.
78 */
79 private QName fElemMap[] = null;
80
81 /**
82 * This is a map of whether the element map contains information
83 * related to ANY models.
84 */
85 private int fElemMapType[] = null;
86
87 /** The element map size. */
88 private int fElemMapSize = 0;
89
90 /** Boolean to distinguish Schema Mixed Content */
91 private boolean fMixed;
92
93 /**
94 * The NFA position of the special EOC (end of content) node. This
95 * is saved away since it's used during the DFA build.
96 */
97 private int fEOCPos = 0;
98
99
100 /**
101 * This is an array of booleans, one per state (there are
102 * fTransTableSize states in the DFA) that indicates whether that
103 * state is a final state.
104 */
105 private boolean fFinalStateFlags[] = null;
106
107 /**
108 * The list of follow positions for each NFA position (i.e. for each
109 * non-epsilon leaf node.) This is only used during the building of
110 * the DFA, and is let go afterwards.
111 */
112 private CMStateSet fFollowList[] = null;
113
114 /**
115 * This is the head node of our intermediate representation. It is
116 * only non-null during the building of the DFA (just so that it
117 * does not have to be passed all around.) Once the DFA is built,
118 * this is no longer required so its nulled out.
119 */
120 private CMNode fHeadNode = null;
121
122 /**
123 * The count of leaf nodes. This is an important number that set some
124 * limits on the sizes of data structures in the DFA process.
125 */
126 private int fLeafCount = 0;
127
128 /**
129 * An array of non-epsilon leaf nodes, which is used during the DFA
130 * build operation, then dropped.
131 */
132 private CMLeaf fLeafList[] = null;
133
134 /** Array mapping ANY types to the leaf list. */
135 private int fLeafListType[] = null;
136
137 //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
138
139 /**
140 * The string pool of our parser session. This is set during construction
141 * and kept around.
142 */
143 //private StringPool fStringPool = null;
144
145 /**
146 * This is the transition table that is the main by product of all
147 * of the effort here. It is an array of arrays of ints. The first
148 * dimension is the number of states we end up with in the DFA. The
149 * second dimensions is the number of unique elements in the content
150 * model (fElemMapSize). Each entry in the second dimension indicates
151 * the new state given that input for the first dimension's start
152 * state.
153 * <p>
154 * The fElemMap array handles mapping from element indexes to
155 * positions in the second dimension of the transition table.
156 */
157 private int fTransTable[][] = null;
158
159 /**
160 * The number of valid entries in the transition table, and in the other
161 * related tables such as fFinalStateFlags.
162 */
163 private int fTransTableSize = 0;
164
165 /**
166 * Flag that indicates that even though we have a "complicated"
167 * content model, it is valid to have no content. In other words,
168 * all parts of the content model are optional. For example:
169 * <pre>
170 * <!ELEMENT AllOptional (Optional*,NotRequired?)>
171 * </pre>
172 */
173 private boolean fEmptyContentIsValid = false;
174
175 // temp variables
176
177 /** Temporary qualified name. */
178 private final QName fQName = new QName();
179
180 //
181 // Constructors
182 //
183
184
185 //
186 // Constructors
187 //
188
189 /**
190 * Constructs a DFA content model.
191 *
192 * @param syntaxTree The syntax tree of the content model.
193 * @param leafCount The number of leaves.
194 * @param mixed
195 *
196 */
197 public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
198 // Store away our index and pools in members
199 //fStringPool = stringPool;
200 fLeafCount = leafCount;
201
202
203 // this is for Schema Mixed Content
204 fMixed = mixed;
205
206 //
207 // Ok, so lets grind through the building of the DFA. This method
208 // handles the high level logic of the algorithm, but it uses a
209 // number of helper classes to do its thing.
210 //
211 // In order to avoid having hundreds of references to the error and
212 // string handlers around, this guy and all of his helper classes
213 // just throw a simple exception and we then pass it along.
214 //
215 buildDFA(syntaxTree);
216 }
217
218 //
219 // ContentModelValidator methods
220 //
221
222 /**
223 * Check that the specified content is valid according to this
224 * content model. This method can also be called to do 'what if'
225 * testing of content models just to see if they would be valid.
226 * <p>
227 * A value of -1 in the children array indicates a PCDATA node. All other
228 * indexes will be positive and represent child elements. The count can be
229 * zero, since some elements have the EMPTY content model and that must be
230 * confirmed.
231 *
232 * @param children The children of this element. Each integer is an index within
233 * the <code>StringPool</code> of the child element name. An index
234 * of -1 is used to indicate an occurrence of non-whitespace character
235 * data.
236 * @param offset Offset into the array where the children starts.
237 * @param length The number of entries in the <code>children</code> array.
238 *
239 * @return The value -1 if fully valid, else the 0 based index of the child
240 * that first failed. If the value returned is equal to the number
241 * of children, then the specified children are valid but additional
242 * content is required to reach a valid ending state.
243 *
244 */
245 public int validate(QName[] children, int offset, int length) {
246
247 if (DEBUG_VALIDATE_CONTENT)
248 System.out.println("DFAContentModel#validateContent");
249
250 //
251 // A DFA content model must *always* have at least 1 child
252 // so a failure is given if no children present.
253 //
254 // Defect 782: This is an incorrect statement because a DFA
255 // content model is also used for constructions such as:
256 //
257 // (Optional*,NotRequired?)
258 //
259 // where a perfectly valid content would be NO CHILDREN.
260 // Therefore, if there are no children, we must check to
261 // see if the CMNODE_EOC marker is a valid start state! -Ac
262 //
263 if (length == 0) {
264 if (DEBUG_VALIDATE_CONTENT) {
265 System.out.println("!!! no children");
266 System.out.println("elemMap="+fElemMap);
267 for (int i = 0; i < fElemMap.length; i++) {
268 String uri = fElemMap[i].uri;
269 String localpart = fElemMap[i].localpart;
270
271 System.out.println("fElemMap["+i+"]="+uri+","+
272 localpart+" ("+
273 uri+", "+
274 localpart+
275 ')');
276
277 }
278 System.out.println("EOCIndex="+fEOCString);
279 }
280
281 return fEmptyContentIsValid ? -1 : 0;
282
283 } // if child count == 0
284
285 //
286 // Lets loop through the children in the array and move our way
287 // through the states. Note that we use the fElemMap array to map
288 // an element index to a state index.
289 //
290 int curState = 0;
291 for (int childIndex = 0; childIndex < length; childIndex++)
292 {
293 // Get the current element index out
294 final QName curElem = children[offset + childIndex];
295 // ignore mixed text
296 if (fMixed && curElem.localpart == null) {
297 continue;
298 }
299
300 // Look up this child in our element map
301 int elemIndex = 0;
302 for (; elemIndex < fElemMapSize; elemIndex++)
303 {
304 int type = fElemMapType[elemIndex] & 0x0f ;
305 if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
306 //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
307 if (fElemMap[elemIndex].rawname == curElem.rawname) {
308 break;
309 }
310 }
311 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
312 String uri = fElemMap[elemIndex].uri;
313 if (uri == null || uri == curElem.uri) {
314 break;
315 }
316 }
317 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
318 if (curElem.uri == null) {
319 break;
320 }
321 }
322 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
323 if (fElemMap[elemIndex].uri != curElem.uri) {
324 break;
325 }
326 }
327 }
328
329 // If we didn't find it, then obviously not valid
330 if (elemIndex == fElemMapSize) {
331 if (DEBUG_VALIDATE_CONTENT) {
332 System.out.println("!!! didn't find it");
333
334 System.out.println("curElem : " +curElem );
335 for (int i=0; i<fElemMapSize; i++) {
336 System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
337 System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
338 }
339 }
340
341 return childIndex;
342 }
343
344 //
345 // Look up the next state for this input symbol when in the
346 // current state.
347 //
348 curState = fTransTable[curState][elemIndex];
349
350 // If its not a legal transition, then invalid
351 if (curState == -1) {
352 if (DEBUG_VALIDATE_CONTENT)
353 System.out.println("!!! not a legal transition");
354 return childIndex;
355 }
356 }
357
358 //
359 // We transitioned all the way through the input list. However, that
360 // does not mean that we ended in a final state. So check whether
361 // our ending state is a final state.
362 //
363 if (DEBUG_VALIDATE_CONTENT)
364 System.out.println("curState="+curState+", childCount="+length);
365 if (!fFinalStateFlags[curState])
366 return length;
367
368 // success!
369 return -1;
370 } // validate
371
372
373 //
374 // Private methods
375 //
376
377 /**
378 * Builds the internal DFA transition table from the given syntax tree.
379 *
380 * @param syntaxTree The syntax tree.
381 *
382 * @exception CMException Thrown if DFA cannot be built.
383 */
384 private void buildDFA(CMNode syntaxTree)
385 {
386 //
387 // The first step we need to take is to rewrite the content model
388 // using our CMNode objects, and in the process get rid of any
389 // repetition short cuts, converting them into '*' style repetitions
390 // or getting rid of repetitions altogether.
391 //
392 // The conversions done are:
393 //
394 // x+ -> (x|x*)
395 // x? -> (x|epsilon)
396 //
397 // This is a relatively complex scenario. What is happening is that
398 // we create a top level binary node of which the special EOC value
399 // is set as the right side node. The the left side is set to the
400 // rewritten syntax tree. The source is the original content model
401 // info from the decl pool. The rewrite is done by buildSyntaxTree()
402 // which recurses the decl pool's content of the element and builds
403 // a new tree in the process.
404 //
405 // Note that, during this operation, we set each non-epsilon leaf
406 // node's DFA state position and count the number of such leafs, which
407 // is left in the fLeafCount member.
408 //
409 // The nodeTmp object is passed in just as a temp node to use during
410 // the recursion. Otherwise, we'd have to create a new node on every
411 // level of recursion, which would be piggy in Java (as is everything
412 // for that matter.)
413 //
414
415 /* MODIFIED (Jan, 2001)
416 *
417 * Use following rules.
418 * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
419 * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
420 *
421 * The same computation of follow as x* is applied to x+
422 *
423 * The modification drastically reduces computation time of
424 * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
425 */
426
427 fQName.setValues(null, fEOCString, fEOCString, null);
428 CMLeaf nodeEOC = new CMLeaf(fQName);
429 fHeadNode = new CMBinOp
430 (
431 XMLContentSpec.CONTENTSPECNODE_SEQ
432 , syntaxTree
433 , nodeEOC
434 );
435
436 //
437 // And handle specially the EOC node, which also must be numbered
438 // and counted as a non-epsilon leaf node. It could not be handled
439 // in the above tree build because it was created before all that
440 // started. We save the EOC position since its used during the DFA
441 // building loop.
442 //
443 fEOCPos = fLeafCount;
444 nodeEOC.setPosition(fLeafCount++);
445
446 //
447 // Ok, so now we have to iterate the new tree and do a little more
448 // work now that we know the leaf count. One thing we need to do is
449 // to calculate the first and last position sets of each node. This
450 // is cached away in each of the nodes.
451 //
452 // Along the way we also set the leaf count in each node as the
453 // maximum state count. They must know this in order to create their
454 // first/last pos sets.
455 //
456 // We also need to build an array of references to the non-epsilon
457 // leaf nodes. Since we iterate it in the same way as before, this
458 // will put them in the array according to their position values.
459 //
460 fLeafList = new CMLeaf[fLeafCount];
461 fLeafListType = new int[fLeafCount];
462 postTreeBuildInit(fHeadNode, 0);
463
464 //
465 // And, moving onward... We now need to build the follow position
466 // sets for all the nodes. So we allocate an array of state sets,
467 // one for each leaf node (i.e. each DFA position.)
468 //
469 fFollowList = new CMStateSet[fLeafCount];
470 for (int index = 0; index < fLeafCount; index++)
471 fFollowList[index] = new CMStateSet(fLeafCount);
472 calcFollowList(fHeadNode);
473 //
474 // And finally the big push... Now we build the DFA using all the
475 // states and the tree we've built up. First we set up the various
476 // data structures we are going to use while we do this.
477 //
478 // First of all we need an array of unique element names in our
479 // content model. For each transition table entry, we need a set of
480 // contiguous indices to represent the transitions for a particular
481 // input element. So we need to a zero based range of indexes that
482 // map to element types. This element map provides that mapping.
483 //
484 fElemMap = new QName[fLeafCount];
485 fElemMapType = new int[fLeafCount];
486 fElemMapSize = 0;
487 for (int outIndex = 0; outIndex < fLeafCount; outIndex++)
488 {
489 fElemMap[outIndex] = new QName();
490
491 /****
492 if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
493 if (fLeafNameTypeVector == null) {
494 fLeafNameTypeVector = new ContentLeafNameTypeVector();
495 }
496 }
497 /****/
498
499 // Get the current leaf's element index
500 final QName element = fLeafList[outIndex].getElement();
501
502 // See if the current leaf node's element index is in the list
503 int inIndex = 0;
504 for (; inIndex < fElemMapSize; inIndex++)
505 {
506 if (fElemMap[inIndex].rawname == element.rawname) {
507 break;
508 }
509 }
510
511 // If it was not in the list, then add it, if not the EOC node
512 if (inIndex == fElemMapSize) {
513 fElemMap[fElemMapSize].setValues(element);
514 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
515 fElemMapSize++;
516 }
517 }
518 // set up the fLeafNameTypeVector object if there is one.
519 /*****
520 if (fLeafNameTypeVector != null) {
521 fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
522 }
523
524 /***
525 * Optimization(Jan, 2001); We sort fLeafList according to
526 * elemIndex which is *uniquely* associated to each leaf.
527 * We are *assuming* that each element appears in at least one leaf.
528 **/
529
530 int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
531 int fSortCount = 0;
532
533 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
534 for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
535 final QName leaf = fLeafList[leafIndex].getElement();
536 final QName element = fElemMap[elemIndex];
537 if (leaf.rawname == element.rawname) {
538 fLeafSorter[fSortCount++] = leafIndex;
539 }
540 }
541 fLeafSorter[fSortCount++] = -1;
542 }
543
544 /* Optimization(Jan, 2001) */
545
546 //
547 // Next lets create some arrays, some that that hold transient
548 // information during the DFA build and some that are permament.
549 // These are kind of sticky since we cannot know how big they will
550 // get, but we don't want to use any Java collections because of
551 // performance.
552 //
553 // Basically they will probably be about fLeafCount*2 on average,
554 // but can be as large as 2^(fLeafCount*2), worst case. So we start
555 // with fLeafCount*4 as a middle ground. This will be very unlikely
556 // to ever have to expand, though it if does, the overhead will be
557 // somewhat ugly.
558 //
559 int curArraySize = fLeafCount * 4;
560 CMStateSet[] statesToDo = new CMStateSet[curArraySize];
561 fFinalStateFlags = new boolean[curArraySize];
562 fTransTable = new int[curArraySize][];
563
564 //
565 // Ok we start with the initial set as the first pos set of the
566 // head node (which is the seq node that holds the content model
567 // and the EOC node.)
568 //
569 CMStateSet setT = fHeadNode.firstPos();
570
571 //
572 // Init our two state flags. Basically the unmarked state counter
573 // is always chasing the current state counter. When it catches up,
574 // that means we made a pass through that did not add any new states
575 // to the lists, at which time we are done. We could have used a
576 // expanding array of flags which we used to mark off states as we
577 // complete them, but this is easier though less readable maybe.
578 //
579 int unmarkedState = 0;
580 int curState = 0;
581
582 //
583 // Init the first transition table entry, and put the initial state
584 // into the states to do list, then bump the current state.
585 //
586 fTransTable[curState] = makeDefStateList();
587 statesToDo[curState] = setT;
588 curState++;
589
590 /* Optimization(Jan, 2001); This is faster for
591 * a large content model such as, "(t001+|t002+|.... |t500+)".
592 */
593
594 HashMap stateTable = new HashMap();
595
596 /* Optimization(Jan, 2001) */
597
598 //
599 // Ok, almost done with the algorithm... We now enter the
600 // loop where we go until the states done counter catches up with
601 // the states to do counter.
602 //
603 while (unmarkedState < curState)
604 {
605 //
606 // Get the first unmarked state out of the list of states to do.
607 // And get the associated transition table entry.
608 //
609 setT = statesToDo[unmarkedState];
610 int[] transEntry = fTransTable[unmarkedState];
611
612 // Mark this one final if it contains the EOC state
613 fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
614
615 // Bump up the unmarked state count, marking this state done
616 unmarkedState++;
617
618 // Loop through each possible input symbol in the element map
619 CMStateSet newSet = null;
620 /* Optimization(Jan, 2001) */
621 int sorterIndex = 0;
622 /* Optimization(Jan, 2001) */
623 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
624 {
625 //
626 // Build up a set of states which is the union of all of
627 // the follow sets of DFA positions that are in the current
628 // state. If we gave away the new set last time through then
629 // create a new one. Otherwise, zero out the existing one.
630 //
631 if (newSet == null)
632 newSet = new CMStateSet(fLeafCount);
633 else
634 newSet.zeroBits();
635
636 /* Optimization(Jan, 2001) */
637 int leafIndex = fLeafSorter[sorterIndex++];
638
639 while (leafIndex != -1) {
640 // If this leaf index (DFA position) is in the current set...
641 if (setT.getBit(leafIndex))
642 {
643 //
644 // If this leaf is the current input symbol, then we
645 // want to add its follow list to the set of states to
646 // transition to from the current state.
647 //
648 newSet.union(fFollowList[leafIndex]);
649 }
650
651 leafIndex = fLeafSorter[sorterIndex++];
652 }
653 /* Optimization(Jan, 2001) */
654
655 //
656 // If this new set is not empty, then see if its in the list
657 // of states to do. If not, then add it.
658 //
659 if (!newSet.isEmpty())
660 {
661 //
662 // Search the 'states to do' list to see if this new
663 // state set is already in there.
664 //
665
666 /* Optimization(Jan, 2001) */
667 Integer stateObj = (Integer)stateTable.get(newSet);
668 int stateIndex = (stateObj == null ? curState : stateObj.intValue());
669 /* Optimization(Jan, 2001) */
670
671 // If we did not find it, then add it
672 if (stateIndex == curState)
673 {
674 //
675 // Put this new state into the states to do and init
676 // a new entry at the same index in the transition
677 // table.
678 //
679 statesToDo[curState] = newSet;
680 fTransTable[curState] = makeDefStateList();
681
682 /* Optimization(Jan, 2001) */
683 stateTable.put(newSet, new Integer(curState));
684 /* Optimization(Jan, 2001) */
685
686 // We now have a new state to do so bump the count
687 curState++;
688
689 //
690 // Null out the new set to indicate we adopted it.
691 // This will cause the creation of a new set on the
692 // next time around the loop.
693 //
694 newSet = null;
695 }
696
697 //
698 // Now set this state in the transition table's entry
699 // for this element (using its index), with the DFA
700 // state we will move to from the current state when we
701 // see this input element.
702 //
703 transEntry[elemIndex] = stateIndex;
704
705 // Expand the arrays if we're full
706 if (curState == curArraySize)
707 {
708 //
709 // Yikes, we overflowed the initial array size, so
710 // we've got to expand all of these arrays. So adjust
711 // up the size by 50% and allocate new arrays.
712 //
713 final int newSize = (int)(curArraySize * 1.5);
714 CMStateSet[] newToDo = new CMStateSet[newSize];
715 boolean[] newFinalFlags = new boolean[newSize];
716 int[][] newTransTable = new int[newSize][];
717
718 // Copy over all of the existing content
719 System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
720 System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
721 System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);
722
723 // Store the new array size
724 curArraySize = newSize;
725 statesToDo = newToDo;
726 fFinalStateFlags = newFinalFlags;
727 fTransTable = newTransTable;
728 }
729 }
730 }
731 }
732
733 // Check to see if we can set the fEmptyContentIsValid flag.
734 fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable();
735
736 //
737 // And now we can say bye bye to the temp representation since we've
738 // built the DFA.
739 //
740 if (DEBUG_VALIDATE_CONTENT)
741 dumpTree(fHeadNode, 0);
742 fHeadNode = null;
743 fLeafList = null;
744 fFollowList = null;
745
746 }
747
748 /**
749 * Calculates the follow list of the current node.
750 *
751 * @param nodeCur The curent node.
752 *
753 * @exception CMException Thrown if follow list cannot be calculated.
754 */
755 private void calcFollowList(CMNode nodeCur)
756 {
757 // Recurse as required
758 if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
759 {
760 // Recurse only
761 calcFollowList(((CMBinOp)nodeCur).getLeft());
762 calcFollowList(((CMBinOp)nodeCur).getRight());
763 }
764 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)
765 {
766 // Recurse first
767 calcFollowList(((CMBinOp)nodeCur).getLeft());
768 calcFollowList(((CMBinOp)nodeCur).getRight());
769
770 //
771 // Now handle our level. We use our left child's last pos
772 // set and our right child's first pos set, so go ahead and
773 // get them ahead of time.
774 //
775 final CMStateSet last = ((CMBinOp)nodeCur).getLeft().lastPos();
776 final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos();
777
778 //
779 // Now, for every position which is in our left child's last set
780 // add all of the states in our right child's first set to the
781 // follow set for that position.
782 //
783 for (int index = 0; index < fLeafCount; index++)
784 {
785 if (last.getBit(index))
786 fFollowList[index].union(first);
787 }
788 }
789 /***
790 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
791 {
792 // Recurse first
793 calcFollowList(((CMUniOp)nodeCur).getChild());
794
795 //
796 // Now handle our level. We use our own first and last position
797 // sets, so get them up front.
798 //
799 final CMStateSet first = nodeCur.firstPos();
800 final CMStateSet last = nodeCur.lastPos();
801
802 //
803 // For every position which is in our last position set, add all
804 // of our first position states to the follow set for that
805 // position.
806 //
807 for (int index = 0; index < fLeafCount; index++)
808 {
809 if (last.getBit(index))
810 fFollowList[index].union(first);
811 }
812 }
813 else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
814 || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
815 {
816 throw new RuntimeException("ImplementationMessages.VAL_NIICM");
817 }
818 /***/
819 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
820 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
821 {
822 // Recurse first
823 calcFollowList(((CMUniOp)nodeCur).getChild());
824
825 //
826 // Now handle our level. We use our own first and last position
827 // sets, so get them up front.
828 //
829 final CMStateSet first = nodeCur.firstPos();
830 final CMStateSet last = nodeCur.lastPos();
831
832 //
833 // For every position which is in our last position set, add all
834 // of our first position states to the follow set for that
835 // position.
836 //
837 for (int index = 0; index < fLeafCount; index++)
838 {
839 if (last.getBit(index))
840 fFollowList[index].union(first);
841 }
842 }
843
844 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
845 // Recurse only
846 calcFollowList(((CMUniOp)nodeCur).getChild());
847 }
848 /***/
849 }
850
851 /**
852 * Dumps the tree of the current node to standard output.
853 *
854 * @param nodeCur The current node.
855 * @param level The maximum levels to output.
856 *
857 * @exception CMException Thrown on error.
858 */
859 private void dumpTree(CMNode nodeCur, int level)
860 {
861 for (int index = 0; index < level; index++)
862 System.out.print(" ");
863
864 int type = nodeCur.type();
865 if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
866 || (type == XMLContentSpec.CONTENTSPECNODE_SEQ))
867 {
868 if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
869 System.out.print("Choice Node ");
870 else
871 System.out.print("Seq Node ");
872
873 if (nodeCur.isNullable())
874 System.out.print("Nullable ");
875
876 System.out.print("firstPos=");
877 System.out.print(nodeCur.firstPos().toString());
878 System.out.print(" lastPos=");
879 System.out.println(nodeCur.lastPos().toString());
880
881 dumpTree(((CMBinOp)nodeCur).getLeft(), level+1);
882 dumpTree(((CMBinOp)nodeCur).getRight(), level+1);
883 }
884 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
885 {
886 System.out.print("Rep Node ");
887
888 if (nodeCur.isNullable())
889 System.out.print("Nullable ");
890
891 System.out.print("firstPos=");
892 System.out.print(nodeCur.firstPos().toString());
893 System.out.print(" lastPos=");
894 System.out.println(nodeCur.lastPos().toString());
895
896 dumpTree(((CMUniOp)nodeCur).getChild(), level+1);
897 }
898 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
899 {
900 System.out.print
901 (
902 "Leaf: (pos="
903 + ((CMLeaf)nodeCur).getPosition()
904 + "), "
905 + ((CMLeaf)nodeCur).getElement()
906 + "(elemIndex="
907 + ((CMLeaf)nodeCur).getElement()
908 + ") "
909 );
910
911 if (nodeCur.isNullable())
912 System.out.print(" Nullable ");
913
914 System.out.print("firstPos=");
915 System.out.print(nodeCur.firstPos().toString());
916 System.out.print(" lastPos=");
917 System.out.println(nodeCur.lastPos().toString());
918 }
919 else
920 {
921 throw new RuntimeException("ImplementationMessages.VAL_NIICM");
922 }
923 }
924
925
926 /**
927 * -1 is used to represent bad transitions in the transition table
928 * entry for each state. So each entry is initialized to an all -1
929 * array. This method creates a new entry and initializes it.
930 */
931 private int[] makeDefStateList()
932 {
933 int[] retArray = new int[fElemMapSize];
934 for (int index = 0; index < fElemMapSize; index++)
935 retArray[index] = -1;
936 return retArray;
937 }
938
939 /** Post tree build initialization. */
940 private int postTreeBuildInit(CMNode nodeCur, int curIndex)
941 {
942 // Set the maximum states on this node
943 nodeCur.setMaxStates(fLeafCount);
944
945 // Recurse as required
946 if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY ||
947 (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL ||
948 (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
949 // REVISIT: Don't waste these structures.
950 QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI());
951 fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition());
952 fLeafListType[curIndex] = nodeCur.type();
953 curIndex++;
954 }
955 else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
956 || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ))
957 {
958 curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex);
959 curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex);
960 }
961 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
962 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
963 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)
964 {
965 curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex);
966 }
967 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
968 {
969 //
970 // Put this node in the leaf list at the current index if its
971 // a non-epsilon leaf.
972 //
973 final QName node = ((CMLeaf)nodeCur).getElement();
974 if (node.localpart != fEpsilonString) {
975 fLeafList[curIndex] = (CMLeaf)nodeCur;
976 fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
977 curIndex++;
978 }
979 }
980 else
981 {
982 throw new RuntimeException("ImplementationMessages.VAL_NIICM: type="+nodeCur.type());
983 }
984 return curIndex;
985 }
986
987 } // class DFAContentModel