Save This Page
Home » jakarta-regexp-1.5 » org.apache » regexp » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *     http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.regexp;
   19   
   20   import java.io.Serializable;
   21   
   22   /**
   23    * A class that holds compiled regular expressions.  This is exposed mainly
   24    * for use by the recompile utility (which helps you produce precompiled
   25    * REProgram objects). You should not otherwise need to work directly with
   26    * this class.
   27    *
   28    * @see RE
   29    * @see RECompiler
   30    *
   31    * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
   32    * @version $Id: REProgram.java 518156 2007-03-14 14:31:26Z vgritsenko $
   33    */
   34   public class REProgram implements Serializable
   35   {
   36       static final int OPT_HASBACKREFS = 1;
   37       static final int OPT_HASBOL      = 2;
   38   
   39       char[] instruction;         // The compiled regular expression 'program'
   40       int lenInstruction;         // The amount of the instruction buffer in use
   41       char[] prefix;              // Prefix string optimization
   42       int flags;                  // Optimization flags (REProgram.OPT_*)
   43       int maxParens = -1;
   44   
   45       /**
   46        * Constructs a program object from a character array
   47        * @param instruction Character array with RE opcode instructions in it
   48        */
   49       public REProgram(char[] instruction)
   50       {
   51           this(instruction, instruction.length);
   52       }
   53   
   54       /**
   55        * Constructs a program object from a character array
   56        * @param parens Count of parens in the program
   57        * @param instruction Character array with RE opcode instructions in it
   58        */
   59       public REProgram(int parens, char[] instruction)
   60       {
   61           this(instruction, instruction.length);
   62           this.maxParens = parens;
   63       }
   64   
   65       /**
   66        * Constructs a program object from a character array
   67        * @param instruction Character array with RE opcode instructions in it
   68        * @param lenInstruction Amount of instruction array in use
   69        */
   70       public REProgram(char[] instruction, int lenInstruction)
   71       {
   72           setInstructions(instruction, lenInstruction);
   73       }
   74   
   75       /**
   76        * Returns a copy of the current regular expression program in a character
   77        * array that is exactly the right length to hold the program.  If there is
   78        * no program compiled yet, getInstructions() will return null.
   79        * @return A copy of the current compiled RE program
   80        */
   81       public char[] getInstructions()
   82       {
   83           // Ensure program has been compiled!
   84           if (lenInstruction != 0)
   85           {
   86               // Return copy of program
   87               char[] ret = new char[lenInstruction];
   88               System.arraycopy(instruction, 0, ret, 0, lenInstruction);
   89               return ret;
   90           }
   91           return null;
   92       }
   93   
   94       /**
   95        * Sets a new regular expression program to run.  It is this method which
   96        * performs any special compile-time search optimizations.  Currently only
   97        * two optimizations are in place - one which checks for backreferences
   98        * (so that they can be lazily allocated) and another which attempts to
   99        * find an prefix anchor string so that substantial amounts of input can
  100        * potentially be skipped without running the actual program.
  101        * @param instruction Program instruction buffer
  102        * @param lenInstruction Length of instruction buffer in use
  103        */
  104       public void setInstructions(char[] instruction, int lenInstruction)
  105       {
  106           // Save reference to instruction array
  107           this.instruction = instruction;
  108           this.lenInstruction = lenInstruction;
  109   
  110           // Initialize other program-related variables
  111           this.flags = 0;
  112           this.prefix = null;
  113   
  114           // Try various compile-time optimizations if there's a program
  115           if (instruction != null && lenInstruction != 0)
  116           {
  117               // If the first node is a branch
  118               if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
  119               {
  120                   // to the end node
  121                   int next = (short) instruction[0 + RE.offsetNext];
  122                   if (instruction[next + RE.offsetOpcode] == RE.OP_END && lenInstruction >= (RE.nodeSize * 2))
  123                   {
  124                       final char nextOp = instruction[RE.nodeSize + RE.offsetOpcode];
  125                       // the branch starts with an atom
  126                       if (nextOp == RE.OP_ATOM)
  127                       {
  128                           // then get that atom as an prefix because there's no other choice
  129                           int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
  130                           this.prefix = new char[lenAtom];
  131                           System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
  132                       }
  133                       // the branch starts with a BOL
  134                       else if (nextOp == RE.OP_BOL)
  135                       {
  136                           // then set the flag indicating that BOL is present
  137                           this.flags |= OPT_HASBOL;
  138                       }
  139                   }
  140               }
  141   
  142               BackrefScanLoop:
  143   
  144               // Check for backreferences
  145               for (int i = 0; i < lenInstruction; i += RE.nodeSize)
  146               {
  147                   switch (instruction[i + RE.offsetOpcode])
  148                   {
  149                       case RE.OP_ANYOF:
  150                           i += (instruction[i + RE.offsetOpdata] * 2);
  151                           break;
  152   
  153                       case RE.OP_ATOM:
  154                           i += instruction[i + RE.offsetOpdata];
  155                           break;
  156   
  157                       case RE.OP_BACKREF:
  158                           flags |= OPT_HASBACKREFS;
  159                           break BackrefScanLoop;
  160                   }
  161               }
  162           }
  163       }
  164   
  165       /**
  166        * Returns a copy of the prefix of current regular expression program
  167        * in a character array.  If there is no prefix, or there is no program
  168        * compiled yet, <code>getPrefix</code> will return null.
  169        * @return A copy of the prefix of current compiled RE program
  170        */
  171       public char[] getPrefix()
  172       {
  173           if (prefix != null)
  174           {
  175               // Return copy of prefix
  176               char[] ret = new char[prefix.length];
  177               System.arraycopy(prefix, 0, ret, 0, prefix.length);
  178               return ret;
  179           }
  180           return null;
  181       }
  182   }

Save This Page
Home » jakarta-regexp-1.5 » org.apache » regexp » [javadoc | source]