Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » dtd » models » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.xerces.impl.dtd.models;
   19   
   20   import java.util.HashMap;
   21   
   22   import org.apache.xerces.impl.dtd.XMLContentSpec;
   23   import org.apache.xerces.xni.QName;
   24   
   25   /**
   26    * DFAContentModel is the derivative of ContentModel that does
   27    * all of the non-trivial element content validation. This class does 
   28    * the conversion from the regular expression to the DFA that 
   29    * it then uses in its validation algorithm.
   30    * <p>
   31    * <b>Note:</b> Upstream work insures that this class will never see
   32    * a content model with PCDATA in it. Any model with PCDATA is 'mixed' 
   33    * and is handled via the MixedContentModel class since mixed models 
   34    * are very constrained in form and easily handled via a special case. 
   35    * This also makes implementation of this class much easier.
   36    * 
   37    * @xerces.internal
   38    * 
   39    * @version $Id: DFAContentModel.java 572057 2007-09-02 18:03:20Z mrglavas $
   40    */
   41   public class DFAContentModel
   42       implements ContentModelValidator {
   43   
   44       //
   45       // Constants
   46       //
   47       // special strings
   48   
   49       /** Epsilon string. */
   50       private static String fEpsilonString = "<<CMNODE_EPSILON>>";
   51   
   52       /** End-of-content string. */
   53       private static String fEOCString = "<<CMNODE_EOC>>";
   54   
   55       /** initializing static members **/
   56       static {
   57           fEpsilonString = fEpsilonString.intern();
   58           fEOCString = fEOCString.intern();
   59       }
   60   
   61       // debugging
   62   
   63       /** Set to true to debug content model validation. */
   64       private static final boolean DEBUG_VALIDATE_CONTENT = false;
   65   
   66       //
   67       // Data
   68       //
   69   
   70       /* this is the EquivClassComparator object */
   71       //private EquivClassComparator comparator = null;
   72   
   73       /**
   74        * This is the map of unique input symbol elements to indices into
   75        * each state's per-input symbol transition table entry. This is part
   76        * of the built DFA information that must be kept around to do the
   77        * actual validation.
   78        */
   79       private QName fElemMap[] = null;
   80   
   81       /**
   82        * This is a map of whether the element map contains information 
   83        * related to ANY models.
   84        */
   85       private int fElemMapType[] = null;
   86   
   87       /** The element map size. */
   88       private int fElemMapSize = 0;
   89   
   90       /** Boolean to distinguish Schema Mixed Content */
   91       private boolean fMixed;
   92   
   93       /**
   94        * The NFA position of the special EOC (end of content) node. This
   95        * is saved away since it's used during the DFA build.
   96        */
   97       private int fEOCPos = 0;
   98   
   99   
  100       /**
  101        * This is an array of booleans, one per state (there are
  102        * fTransTableSize states in the DFA) that indicates whether that
  103        * state is a final state.
  104        */
  105       private boolean fFinalStateFlags[] = null;
  106   
  107       /**
  108        * The list of follow positions for each NFA position (i.e. for each
  109        * non-epsilon leaf node.) This is only used during the building of
  110        * the DFA, and is let go afterwards.
  111        */
  112       private CMStateSet fFollowList[] = null;
  113   
  114       /**
  115        * This is the head node of our intermediate representation. It is
  116        * only non-null during the building of the DFA (just so that it
  117        * does not have to be passed all around.) Once the DFA is built,
  118        * this is no longer required so its nulled out.
  119        */
  120       private CMNode fHeadNode = null;
  121   
  122       /**
  123        * The count of leaf nodes. This is an important number that set some
  124        * limits on the sizes of data structures in the DFA process.
  125        */
  126       private int fLeafCount = 0;
  127   
  128       /**
  129        * An array of non-epsilon leaf nodes, which is used during the DFA
  130        * build operation, then dropped.
  131        */
  132       private CMLeaf fLeafList[] = null;
  133   
  134       /** Array mapping ANY types to the leaf list. */
  135       private int fLeafListType[] = null;
  136   
  137       //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
  138   
  139       /**
  140        * The string pool of our parser session. This is set during construction
  141        * and kept around.
  142        */
  143       //private StringPool fStringPool = null;
  144   
  145       /**
  146        * This is the transition table that is the main by product of all
  147        * of the effort here. It is an array of arrays of ints. The first
  148        * dimension is the number of states we end up with in the DFA. The
  149        * second dimensions is the number of unique elements in the content
  150        * model (fElemMapSize). Each entry in the second dimension indicates
  151        * the new state given that input for the first dimension's start
  152        * state.
  153        * <p>
  154        * The fElemMap array handles mapping from element indexes to
  155        * positions in the second dimension of the transition table.
  156        */
  157       private int fTransTable[][] = null;
  158   
  159       /**
  160        * The number of valid entries in the transition table, and in the other
  161        * related tables such as fFinalStateFlags.
  162        */
  163       private int fTransTableSize = 0;
  164   
  165       /**
  166        * Flag that indicates that even though we have a "complicated"
  167        * content model, it is valid to have no content. In other words,
  168        * all parts of the content model are optional. For example:
  169        * <pre>
  170        *      &lt;!ELEMENT AllOptional (Optional*,NotRequired?)&gt;
  171        * </pre>
  172        */
  173       private boolean fEmptyContentIsValid = false;
  174   
  175       // temp variables
  176   
  177       /** Temporary qualified name. */
  178       private final QName fQName = new QName();
  179   
  180       //
  181       // Constructors
  182       //
  183   
  184   
  185       //
  186       // Constructors
  187       //
  188   
  189       /**
  190        * Constructs a DFA content model.
  191        *
  192        * @param syntaxTree    The syntax tree of the content model.
  193        * @param leafCount     The number of leaves.
  194        * @param mixed
  195        *
  196        */
  197       public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
  198           // Store away our index and pools in members
  199           //fStringPool = stringPool;
  200           fLeafCount = leafCount;
  201   
  202   
  203           // this is for Schema Mixed Content
  204           fMixed = mixed;
  205   
  206           //
  207           //  Ok, so lets grind through the building of the DFA. This method
  208           //  handles the high level logic of the algorithm, but it uses a
  209           //  number of helper classes to do its thing.
  210           //
  211           //  In order to avoid having hundreds of references to the error and
  212           //  string handlers around, this guy and all of his helper classes
  213           //  just throw a simple exception and we then pass it along.
  214           //
  215           buildDFA(syntaxTree);
  216       }
  217   
  218       //
  219       // ContentModelValidator methods
  220       //
  221   
  222       /**
  223        * Check that the specified content is valid according to this
  224        * content model. This method can also be called to do 'what if' 
  225        * testing of content models just to see if they would be valid.
  226        * <p>
  227        * A value of -1 in the children array indicates a PCDATA node. All other 
  228        * indexes will be positive and represent child elements. The count can be
  229        * zero, since some elements have the EMPTY content model and that must be 
  230        * confirmed.
  231        *
  232        * @param children The children of this element.  Each integer is an index within
  233        *                 the <code>StringPool</code> of the child element name.  An index
  234        *                 of -1 is used to indicate an occurrence of non-whitespace character
  235        *                 data.
  236        * @param offset Offset into the array where the children starts.
  237        * @param length The number of entries in the <code>children</code> array.
  238        *
  239        * @return The value -1 if fully valid, else the 0 based index of the child
  240        *         that first failed. If the value returned is equal to the number
  241        *         of children, then the specified children are valid but additional
  242        *         content is required to reach a valid ending state.
  243        *
  244        */
  245       public int validate(QName[] children, int offset, int length) {
  246   
  247           if (DEBUG_VALIDATE_CONTENT) 
  248               System.out.println("DFAContentModel#validateContent");
  249   
  250           //
  251           // A DFA content model must *always* have at least 1 child
  252           // so a failure is given if no children present.
  253           // 
  254           // Defect 782: This is an incorrect statement because a DFA
  255           // content model is also used for constructions such as:
  256           //
  257           //     (Optional*,NotRequired?)
  258           //
  259           // where a perfectly valid content would be NO CHILDREN.
  260           // Therefore, if there are no children, we must check to
  261           // see if the CMNODE_EOC marker is a valid start state! -Ac
  262           //
  263           if (length == 0) {
  264               if (DEBUG_VALIDATE_CONTENT) {
  265                   System.out.println("!!! no children");
  266                   System.out.println("elemMap="+fElemMap);
  267                   for (int i = 0; i < fElemMap.length; i++) {
  268                       String uri = fElemMap[i].uri;
  269                       String localpart = fElemMap[i].localpart;
  270                       
  271                       System.out.println("fElemMap["+i+"]="+uri+","+
  272                                          localpart+" ("+
  273                                          uri+", "+
  274                                          localpart+
  275                                          ')');
  276                                          
  277                   }
  278                   System.out.println("EOCIndex="+fEOCString);
  279               }
  280   
  281               return fEmptyContentIsValid ? -1 : 0;
  282   
  283           } // if child count == 0
  284   
  285           //
  286           //  Lets loop through the children in the array and move our way
  287           //  through the states. Note that we use the fElemMap array to map
  288           //  an element index to a state index.
  289           //
  290           int curState = 0;
  291           for (int childIndex = 0; childIndex < length; childIndex++)
  292           {
  293               // Get the current element index out
  294               final QName curElem = children[offset + childIndex];
  295               // ignore mixed text
  296               if (fMixed && curElem.localpart == null) {
  297                   continue;
  298               }
  299   
  300               // Look up this child in our element map
  301               int elemIndex = 0;
  302               for (; elemIndex < fElemMapSize; elemIndex++)
  303               {
  304                   int type = fElemMapType[elemIndex] & 0x0f ;
  305                   if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
  306                       //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
  307                       if (fElemMap[elemIndex].rawname == curElem.rawname) {
  308                           break;
  309                       }
  310                   }
  311                   else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
  312                       String uri = fElemMap[elemIndex].uri;
  313                       if (uri == null || uri == curElem.uri) {
  314                           break;
  315                       }
  316                   }
  317                   else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
  318                       if (curElem.uri == null) {
  319                           break;
  320                       }
  321                   }
  322                   else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
  323                       if (fElemMap[elemIndex].uri != curElem.uri) {
  324                           break;
  325                       }
  326                   }
  327               }
  328   
  329               // If we didn't find it, then obviously not valid
  330               if (elemIndex == fElemMapSize) {
  331                   if (DEBUG_VALIDATE_CONTENT) {
  332                       System.out.println("!!! didn't find it");
  333   
  334                       System.out.println("curElem : " +curElem );
  335                       for (int i=0; i<fElemMapSize; i++) {
  336                           System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
  337                           System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
  338                       }
  339                   }
  340   
  341                   return childIndex;
  342               }
  343   
  344               //
  345               //  Look up the next state for this input symbol when in the
  346               //  current state.
  347               //
  348               curState = fTransTable[curState][elemIndex];
  349   
  350               // If its not a legal transition, then invalid
  351               if (curState == -1) {
  352                   if (DEBUG_VALIDATE_CONTENT) 
  353                       System.out.println("!!! not a legal transition");
  354                   return childIndex;
  355               }
  356           }
  357   
  358           //
  359           //  We transitioned all the way through the input list. However, that
  360           //  does not mean that we ended in a final state. So check whether
  361           //  our ending state is a final state.
  362           //
  363           if (DEBUG_VALIDATE_CONTENT) 
  364               System.out.println("curState="+curState+", childCount="+length);
  365           if (!fFinalStateFlags[curState])
  366               return length;
  367   
  368           // success!
  369           return -1;
  370       } // validate
  371   
  372   
  373       //
  374       // Private methods
  375       //
  376   
  377       /** 
  378        * Builds the internal DFA transition table from the given syntax tree.
  379        *
  380        * @param syntaxTree The syntax tree.
  381        *
  382        * @exception CMException Thrown if DFA cannot be built.
  383        */
  384       private void buildDFA(CMNode syntaxTree) 
  385       {
  386           //
  387           //  The first step we need to take is to rewrite the content model
  388           //  using our CMNode objects, and in the process get rid of any
  389           //  repetition short cuts, converting them into '*' style repetitions
  390           //  or getting rid of repetitions altogether.
  391           //
  392           //  The conversions done are:
  393           //
  394           //  x+ -> (x|x*)
  395           //  x? -> (x|epsilon)
  396           //
  397           //  This is a relatively complex scenario. What is happening is that
  398           //  we create a top level binary node of which the special EOC value
  399           //  is set as the right side node. The the left side is set to the
  400           //  rewritten syntax tree. The source is the original content model
  401           //  info from the decl pool. The rewrite is done by buildSyntaxTree()
  402           //  which recurses the decl pool's content of the element and builds
  403           //  a new tree in the process.
  404           //
  405           //  Note that, during this operation, we set each non-epsilon leaf
  406           //  node's DFA state position and count the number of such leafs, which
  407           //  is left in the fLeafCount member.
  408           //
  409           //  The nodeTmp object is passed in just as a temp node to use during
  410           //  the recursion. Otherwise, we'd have to create a new node on every
  411           //  level of recursion, which would be piggy in Java (as is everything
  412           //  for that matter.)
  413           //
  414   
  415   	/* MODIFIED (Jan, 2001) 
  416   	 *
  417   	 * Use following rules.
  418   	 *   nullable(x+) := nullable(x), first(x+) := first(x),  last(x+) := last(x)
  419   	 *   nullable(x?) := true, first(x?) := first(x),  last(x?) := last(x)
  420   	 *
  421   	 * The same computation of follow as x* is applied to x+
  422   	 *
  423   	 * The modification drastically reduces computation time of
  424   	 * "(a, (b, a+, (c, (b, a+)+, a+, (d,  (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
  425   	 */
  426   
  427           fQName.setValues(null, fEOCString, fEOCString, null);
  428           CMLeaf nodeEOC = new CMLeaf(fQName);
  429           fHeadNode = new CMBinOp
  430           (
  431               XMLContentSpec.CONTENTSPECNODE_SEQ
  432               , syntaxTree
  433               , nodeEOC
  434           );
  435   
  436           //
  437           //  And handle specially the EOC node, which also must be numbered
  438           //  and counted as a non-epsilon leaf node. It could not be handled
  439           //  in the above tree build because it was created before all that
  440           //  started. We save the EOC position since its used during the DFA
  441           //  building loop.
  442           //
  443           fEOCPos = fLeafCount;
  444           nodeEOC.setPosition(fLeafCount++);
  445   
  446           //
  447           //  Ok, so now we have to iterate the new tree and do a little more
  448           //  work now that we know the leaf count. One thing we need to do is
  449           //  to calculate the first and last position sets of each node. This
  450           //  is cached away in each of the nodes.
  451           //
  452           //  Along the way we also set the leaf count in each node as the
  453           //  maximum state count. They must know this in order to create their
  454           //  first/last pos sets.
  455           //
  456           //  We also need to build an array of references to the non-epsilon
  457           //  leaf nodes. Since we iterate it in the same way as before, this
  458           //  will put them in the array according to their position values.
  459           //
  460           fLeafList = new CMLeaf[fLeafCount];
  461           fLeafListType = new int[fLeafCount];
  462           postTreeBuildInit(fHeadNode, 0);
  463   
  464           //
  465           //  And, moving onward... We now need to build the follow position
  466           //  sets for all the nodes. So we allocate an array of state sets,
  467           //  one for each leaf node (i.e. each DFA position.)
  468           //
  469           fFollowList = new CMStateSet[fLeafCount];
  470           for (int index = 0; index < fLeafCount; index++)
  471               fFollowList[index] = new CMStateSet(fLeafCount);
  472           calcFollowList(fHeadNode);
  473           //
  474           //  And finally the big push... Now we build the DFA using all the
  475           //  states and the tree we've built up. First we set up the various
  476           //  data structures we are going to use while we do this.
  477           //
  478           //  First of all we need an array of unique element names in our
  479           //  content model. For each transition table entry, we need a set of
  480           //  contiguous indices to represent the transitions for a particular
  481           //  input element. So we need to a zero based range of indexes that
  482           //  map to element types. This element map provides that mapping.
  483           //
  484           fElemMap = new QName[fLeafCount];
  485           fElemMapType = new int[fLeafCount];
  486           fElemMapSize = 0;
  487           for (int outIndex = 0; outIndex < fLeafCount; outIndex++)
  488           {
  489               fElemMap[outIndex] = new QName();
  490   
  491               /****
  492               if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
  493                   if (fLeafNameTypeVector == null) {
  494                       fLeafNameTypeVector = new ContentLeafNameTypeVector();
  495                   }
  496               }
  497               /****/
  498   
  499               // Get the current leaf's element index
  500               final QName element = fLeafList[outIndex].getElement();
  501   
  502               // See if the current leaf node's element index is in the list
  503               int inIndex = 0;
  504               for (; inIndex < fElemMapSize; inIndex++)
  505               {
  506                   if (fElemMap[inIndex].rawname == element.rawname) {
  507                       break;
  508                   }
  509               }
  510   
  511               // If it was not in the list, then add it, if not the EOC node
  512               if (inIndex == fElemMapSize) {
  513                   fElemMap[fElemMapSize].setValues(element);
  514                   fElemMapType[fElemMapSize] = fLeafListType[outIndex];
  515                   fElemMapSize++;
  516               }
  517           }
  518           // set up the fLeafNameTypeVector object if there is one.
  519           /*****
  520           if (fLeafNameTypeVector != null) {
  521               fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
  522           }
  523   
  524   	/*** 
  525   	* Optimization(Jan, 2001); We sort fLeafList according to 
  526   	* elemIndex which is *uniquely* associated to each leaf.  
  527   	* We are *assuming* that each element appears in at least one leaf.
  528   	**/
  529   
  530   	int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
  531   	int fSortCount = 0;
  532   
  533   	for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
  534   	    for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
  535   		    final QName leaf = fLeafList[leafIndex].getElement();
  536   		    final QName element = fElemMap[elemIndex];
  537   		    if (leaf.rawname == element.rawname) {
  538   			    fLeafSorter[fSortCount++] = leafIndex;
  539   		    }
  540   	    }
  541   	    fLeafSorter[fSortCount++] = -1;
  542   	}
  543   
  544   	/* Optimization(Jan, 2001) */
  545   
  546           //
  547           //  Next lets create some arrays, some that that hold transient
  548           //  information during the DFA build and some that are permament.
  549           //  These are kind of sticky since we cannot know how big they will
  550           //  get, but we don't want to use any Java collections because of
  551           //  performance.
  552           //
  553           //  Basically they will probably be about fLeafCount*2 on average,
  554           //  but can be as large as 2^(fLeafCount*2), worst case. So we start
  555           //  with fLeafCount*4 as a middle ground. This will be very unlikely
  556           //  to ever have to expand, though it if does, the overhead will be
  557           //  somewhat ugly.
  558           //
  559           int curArraySize = fLeafCount * 4;
  560           CMStateSet[] statesToDo = new CMStateSet[curArraySize];
  561           fFinalStateFlags = new boolean[curArraySize];
  562           fTransTable = new int[curArraySize][];
  563   
  564           //
  565           //  Ok we start with the initial set as the first pos set of the
  566           //  head node (which is the seq node that holds the content model
  567           //  and the EOC node.)
  568           //
  569           CMStateSet setT = fHeadNode.firstPos();
  570   
  571           //
  572           //  Init our two state flags. Basically the unmarked state counter
  573           //  is always chasing the current state counter. When it catches up,
  574           //  that means we made a pass through that did not add any new states
  575           //  to the lists, at which time we are done. We could have used a
  576           //  expanding array of flags which we used to mark off states as we
  577           //  complete them, but this is easier though less readable maybe.
  578           //
  579           int unmarkedState = 0;
  580           int curState = 0;
  581   
  582           //
  583           //  Init the first transition table entry, and put the initial state
  584           //  into the states to do list, then bump the current state.
  585           //
  586           fTransTable[curState] = makeDefStateList();
  587           statesToDo[curState] = setT;
  588           curState++;
  589   
  590   	    /* Optimization(Jan, 2001); This is faster for
  591   	     * a large content model such as, "(t001+|t002+|.... |t500+)".
  592   	     */
  593   
  594           HashMap stateTable = new HashMap();
  595   
  596   	    /* Optimization(Jan, 2001) */
  597   
  598           //
  599           //  Ok, almost done with the algorithm... We now enter the
  600           //  loop where we go until the states done counter catches up with
  601           //  the states to do counter.
  602           //
  603           while (unmarkedState < curState)
  604           {
  605               //
  606               //  Get the first unmarked state out of the list of states to do.
  607               //  And get the associated transition table entry.
  608               //
  609               setT = statesToDo[unmarkedState];
  610               int[] transEntry = fTransTable[unmarkedState];
  611   
  612               // Mark this one final if it contains the EOC state
  613               fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
  614   
  615               // Bump up the unmarked state count, marking this state done
  616               unmarkedState++;
  617   
  618               // Loop through each possible input symbol in the element map
  619               CMStateSet newSet = null;
  620   	    /* Optimization(Jan, 2001) */
  621               int sorterIndex = 0;
  622   	    /* Optimization(Jan, 2001) */
  623               for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
  624               {
  625                   //
  626                   //  Build up a set of states which is the union of all of
  627                   //  the follow sets of DFA positions that are in the current
  628                   //  state. If we gave away the new set last time through then
  629                   //  create a new one. Otherwise, zero out the existing one.
  630                   //
  631                   if (newSet == null)
  632                       newSet = new CMStateSet(fLeafCount);
  633                   else
  634                       newSet.zeroBits();
  635   
  636   	    /* Optimization(Jan, 2001) */
  637                   int leafIndex = fLeafSorter[sorterIndex++];
  638   
  639                   while (leafIndex != -1) {
  640   	        // If this leaf index (DFA position) is in the current set...
  641                       if (setT.getBit(leafIndex))
  642                       {
  643                           //
  644                           //  If this leaf is the current input symbol, then we
  645                           //  want to add its follow list to the set of states to
  646                           //  transition to from the current state.
  647                           //
  648                                   newSet.union(fFollowList[leafIndex]);
  649                               }
  650   
  651                      leafIndex = fLeafSorter[sorterIndex++];
  652   	}
  653   	    /* Optimization(Jan, 2001) */
  654   
  655                   //
  656                   //  If this new set is not empty, then see if its in the list
  657                   //  of states to do. If not, then add it.
  658                   //
  659                   if (!newSet.isEmpty())
  660                   {
  661                       //
  662                       //  Search the 'states to do' list to see if this new
  663                       //  state set is already in there.
  664                       //
  665   
  666   	    /* Optimization(Jan, 2001) */
  667   	    Integer stateObj = (Integer)stateTable.get(newSet);
  668   	    int stateIndex = (stateObj == null ? curState : stateObj.intValue());
  669   	    /* Optimization(Jan, 2001) */
  670   
  671                       // If we did not find it, then add it
  672                       if (stateIndex == curState)
  673                       {
  674                           //
  675                           //  Put this new state into the states to do and init
  676                           //  a new entry at the same index in the transition
  677                           //  table.
  678                           //
  679                           statesToDo[curState] = newSet;
  680                           fTransTable[curState] = makeDefStateList();
  681   
  682   	    /* Optimization(Jan, 2001) */
  683                           stateTable.put(newSet, new Integer(curState));
  684   	    /* Optimization(Jan, 2001) */
  685   
  686                           // We now have a new state to do so bump the count
  687                           curState++;
  688   
  689                           //
  690                           //  Null out the new set to indicate we adopted it.
  691                           //  This will cause the creation of a new set on the
  692                           //  next time around the loop.
  693                           //
  694                           newSet = null;
  695                       }
  696   
  697                       //
  698                       //  Now set this state in the transition table's entry
  699                       //  for this element (using its index), with the DFA
  700                       //  state we will move to from the current state when we
  701                       //  see this input element.
  702                       //
  703                       transEntry[elemIndex] = stateIndex;
  704   
  705                       // Expand the arrays if we're full
  706                       if (curState == curArraySize)
  707                       {
  708                           //
  709                           //  Yikes, we overflowed the initial array size, so
  710                           //  we've got to expand all of these arrays. So adjust
  711                           //  up the size by 50% and allocate new arrays.
  712                           //
  713                           final int newSize = (int)(curArraySize * 1.5);
  714                           CMStateSet[] newToDo = new CMStateSet[newSize];
  715                           boolean[] newFinalFlags = new boolean[newSize];
  716                           int[][] newTransTable = new int[newSize][];
  717   
  718                           // Copy over all of the existing content
  719                           System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
  720                           System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
  721                           System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);
  722   
  723                           // Store the new array size
  724                           curArraySize = newSize;
  725                           statesToDo = newToDo;
  726                           fFinalStateFlags = newFinalFlags;
  727                           fTransTable = newTransTable;
  728                       }
  729                   }
  730               }
  731           }
  732   
  733           // Check to see if we can set the fEmptyContentIsValid flag.
  734           fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable();
  735   
  736           //
  737           //  And now we can say bye bye to the temp representation since we've
  738           //  built the DFA.
  739           //
  740           if (DEBUG_VALIDATE_CONTENT) 
  741               dumpTree(fHeadNode, 0);
  742           fHeadNode = null;
  743           fLeafList = null;
  744           fFollowList = null;
  745   
  746       }
  747   
  748       /**
  749        * Calculates the follow list of the current node.
  750        *
  751        * @param nodeCur The curent node.
  752        *
  753        * @exception CMException Thrown if follow list cannot be calculated.
  754        */
  755       private void calcFollowList(CMNode nodeCur) 
  756       {
  757           // Recurse as required
  758           if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
  759           {
  760               // Recurse only
  761               calcFollowList(((CMBinOp)nodeCur).getLeft());
  762               calcFollowList(((CMBinOp)nodeCur).getRight());
  763           }
  764            else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)
  765           {
  766               // Recurse first
  767               calcFollowList(((CMBinOp)nodeCur).getLeft());
  768               calcFollowList(((CMBinOp)nodeCur).getRight());
  769   
  770               //
  771               //  Now handle our level. We use our left child's last pos
  772               //  set and our right child's first pos set, so go ahead and
  773               //  get them ahead of time.
  774               //
  775               final CMStateSet last  = ((CMBinOp)nodeCur).getLeft().lastPos();
  776               final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos();
  777   
  778               //
  779               //  Now, for every position which is in our left child's last set
  780               //  add all of the states in our right child's first set to the
  781               //  follow set for that position.
  782               //
  783               for (int index = 0; index < fLeafCount; index++)
  784               {
  785                   if (last.getBit(index))
  786                       fFollowList[index].union(first);
  787               }
  788           }
  789            /***
  790            else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
  791           {
  792               // Recurse first
  793               calcFollowList(((CMUniOp)nodeCur).getChild());
  794   
  795               //
  796               //  Now handle our level. We use our own first and last position
  797               //  sets, so get them up front.
  798               //
  799               final CMStateSet first = nodeCur.firstPos();
  800               final CMStateSet last  = nodeCur.lastPos();
  801   
  802               //
  803               //  For every position which is in our last position set, add all
  804               //  of our first position states to the follow set for that
  805               //  position.
  806               //
  807               for (int index = 0; index < fLeafCount; index++)
  808               {
  809                   if (last.getBit(index))
  810                       fFollowList[index].union(first);
  811               }
  812           }
  813            else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
  814                 ||  (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
  815           {
  816               throw new RuntimeException("ImplementationMessages.VAL_NIICM");
  817           }
  818           /***/
  819            else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
  820   	    || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
  821           {
  822               // Recurse first
  823               calcFollowList(((CMUniOp)nodeCur).getChild());
  824   
  825               //
  826               //  Now handle our level. We use our own first and last position
  827               //  sets, so get them up front.
  828               //
  829               final CMStateSet first = nodeCur.firstPos();
  830               final CMStateSet last  = nodeCur.lastPos();
  831   
  832               //
  833               //  For every position which is in our last position set, add all
  834               //  of our first position states to the follow set for that
  835               //  position.
  836               //
  837               for (int index = 0; index < fLeafCount; index++)
  838               {
  839                   if (last.getBit(index))
  840                       fFollowList[index].union(first);
  841               }
  842           }
  843          
  844           else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
  845               // Recurse only
  846               calcFollowList(((CMUniOp)nodeCur).getChild());
  847           }
  848            /***/
  849       }
  850   
  851       /**
  852        * Dumps the tree of the current node to standard output.
  853        *
  854        * @param nodeCur The current node.
  855        * @param level   The maximum levels to output.
  856        *
  857        * @exception CMException Thrown on error.
  858        */
  859       private void dumpTree(CMNode nodeCur, int level) 
  860       {
  861           for (int index = 0; index < level; index++)
  862               System.out.print("   ");
  863   
  864           int type = nodeCur.type();
  865           if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
  866           ||  (type == XMLContentSpec.CONTENTSPECNODE_SEQ))
  867           {
  868               if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
  869                   System.out.print("Choice Node ");
  870               else
  871                   System.out.print("Seq Node ");
  872   
  873               if (nodeCur.isNullable())
  874                   System.out.print("Nullable ");
  875   
  876               System.out.print("firstPos=");
  877               System.out.print(nodeCur.firstPos().toString());
  878               System.out.print(" lastPos=");
  879               System.out.println(nodeCur.lastPos().toString());
  880   
  881               dumpTree(((CMBinOp)nodeCur).getLeft(), level+1);
  882               dumpTree(((CMBinOp)nodeCur).getRight(), level+1);
  883           }
  884            else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
  885           {
  886               System.out.print("Rep Node ");
  887   
  888               if (nodeCur.isNullable())
  889                   System.out.print("Nullable ");
  890   
  891               System.out.print("firstPos=");
  892               System.out.print(nodeCur.firstPos().toString());
  893               System.out.print(" lastPos=");
  894               System.out.println(nodeCur.lastPos().toString());
  895   
  896               dumpTree(((CMUniOp)nodeCur).getChild(), level+1);
  897           }
  898            else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
  899           {
  900               System.out.print
  901               (
  902                   "Leaf: (pos="
  903                   + ((CMLeaf)nodeCur).getPosition()
  904                   + "), "
  905                   + ((CMLeaf)nodeCur).getElement()
  906                   + "(elemIndex="
  907                   + ((CMLeaf)nodeCur).getElement()
  908                   + ") "
  909               );
  910   
  911               if (nodeCur.isNullable())
  912                   System.out.print(" Nullable ");
  913   
  914               System.out.print("firstPos=");
  915               System.out.print(nodeCur.firstPos().toString());
  916               System.out.print(" lastPos=");
  917               System.out.println(nodeCur.lastPos().toString());
  918           }
  919            else
  920           {
  921               throw new RuntimeException("ImplementationMessages.VAL_NIICM");
  922           }
  923       }
  924   
  925   
  926       /**
  927        * -1 is used to represent bad transitions in the transition table
  928        * entry for each state. So each entry is initialized to an all -1
  929        * array. This method creates a new entry and initializes it.
  930        */
  931       private int[] makeDefStateList()
  932       {
  933           int[] retArray = new int[fElemMapSize];
  934           for (int index = 0; index < fElemMapSize; index++)
  935               retArray[index] = -1;
  936           return retArray;
  937       }
  938   
  939       /** Post tree build initialization. */
  940       private int postTreeBuildInit(CMNode nodeCur, int curIndex) 
  941       {
  942           // Set the maximum states on this node
  943           nodeCur.setMaxStates(fLeafCount);
  944   
  945           // Recurse as required
  946           if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY ||
  947               (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL ||
  948               (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
  949               // REVISIT: Don't waste these structures.
  950               QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI());
  951               fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition());
  952               fLeafListType[curIndex] = nodeCur.type();
  953               curIndex++;
  954           }
  955           else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
  956           ||  (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ))
  957           {
  958               curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex);
  959               curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex);
  960           }
  961            else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
  962   	     || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
  963   	     || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)
  964           {
  965               curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex);
  966           }
  967            else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
  968           {
  969               //
  970               //  Put this node in the leaf list at the current index if its
  971               //  a non-epsilon leaf.
  972               //
  973                final QName node = ((CMLeaf)nodeCur).getElement();
  974               if (node.localpart != fEpsilonString) {
  975                   fLeafList[curIndex] = (CMLeaf)nodeCur;
  976                   fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
  977                   curIndex++;
  978               }
  979           }
  980            else
  981           {
  982               throw new RuntimeException("ImplementationMessages.VAL_NIICM: type="+nodeCur.type());
  983           }
  984           return curIndex;
  985       }
  986   
  987   } // class DFAContentModel

Save This Page
Home » Xerces-J-src.2.9.1 » org.apache.xerces » impl » dtd » models » [javadoc | source]