Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/jcorporate/expresso/ext/regexp/REProgram.java


1   /* ====================================================================
2    * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3    *
4    * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5    *
6    * Redistribution and use in source and binary forms, with or without
7    * modification, are permitted provided that the following conditions
8    * are met:
9    *
10   * 1. Redistributions of source code must retain the above copyright
11   *    notice, this list of conditions and the following disclaimer.
12   *
13   * 2. Redistributions in binary form must reproduce the above copyright
14   *    notice, this list of conditions and the following disclaimer in
15   *    the documentation and/or other materials provided with the
16   *    distribution.
17   *
18   * 3. The end-user documentation included with the redistribution,
19   *    if any, must include the following acknowledgment:
20   *       "This product includes software developed by Jcorporate Ltd.
21   *        (http://www.jcorporate.com/)."
22   *    Alternately, this acknowledgment may appear in the software itself,
23   *    if and wherever such third-party acknowledgments normally appear.
24   *
25   * 4. "Jcorporate" and product names such as "Expresso" must
26   *    not be used to endorse or promote products derived from this
27   *    software without prior written permission. For written permission,
28   *    please contact info@jcorporate.com.
29   *
30   * 5. Products derived from this software may not be called "Expresso",
31   *    or other Jcorporate product names; nor may "Expresso" or other
32   *    Jcorporate product names appear in their name, without prior
33   *    written permission of Jcorporate Ltd.
34   *
35   * 6. No product derived from this software may compete in the same
36   *    market space, i.e. framework, without prior written permission
37   *    of Jcorporate Ltd. For written permission, please contact
38   *    partners@jcorporate.com.
39   *
40   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43   * DISCLAIMED.  IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46   * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51   * SUCH DAMAGE.
52   * ====================================================================
53   *
54   * This software consists of voluntary contributions made by many
55   * individuals on behalf of the Jcorporate Ltd. Contributions back
56   * to the project(s) are encouraged when you make modifications.
57   * Please send them to support@jcorporate.com. For more information
58   * on Jcorporate Ltd. and its products, please see
59   * <http://www.jcorporate.com/>.
60   *
61   * Portions of this software are based upon other open source
62   * products and are subject to their respective licenses.
63   */
64  
65  package com.jcorporate.expresso.ext.regexp;
66  
67  /*
68   * ====================================================================
69   *
70   * The Apache Software License, Version 1.1
71   *
72   * Copyright (c) 1999 The Apache Software Foundation.  All rights
73   * reserved.
74   *
75   * Redistribution and use in source and binary forms, with or without
76   * modification, are permitted provided that the following conditions
77   * are met:
78   *
79   * 1. Redistributions of source code must retain the above copyright
80   *    notice, this list of conditions and the following disclaimer.
81   *
82   * 2. Redistributions in binary form must reproduce the above copyright
83   *    notice, this list of conditions and the following disclaimer in
84   *    the documentation and/or other materials provided with the
85   *    distribution.
86   *
87   * 3. The end-user documentation included with the redistribution, if
88   *    any, must include the following acknowlegement:
89   *       "This product includes software developed by the
90   *        Apache Software Foundation (http://www.apache.org/)."
91   *    Alternately, this acknowlegement may appear in the software itself,
92   *    if and wherever such third-party acknowlegements normally appear.
93   *
94   * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
95   *    Foundation" must not be used to endorse or promote products derived
96   *    from this software without prior written permission. For written
97   *    permission, please contact apache@apache.org.
98   *
99   * 5. Products derived from this software may not be called "Apache"
100  *    nor may "Apache" appear in their names without prior written
101  *    permission of the Apache Group.
102  *
103  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
104  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
105  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
106  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
107  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
108  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
109  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
110  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
111  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
112  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
113  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
114  * SUCH DAMAGE.
115  * ====================================================================
116  *
117  * This software consists of voluntary contributions made by many
118  * individuals on behalf of the Apache Software Foundation.  For more
119  * information on the Apache Software Foundation, please see
120  * <http://www.apache.org/>.
121  *
122  */
123 
124 
125 /**
126  * A class that holds compiled regular expressions.  This is exposed mainly
127  * for use by the recompile utility (which helps you produce precompiled
128  * REProgram objects). You should not otherwise need to work directly with
129  * this class.
130  *
131  * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
132  * @version $Id: REProgram.java,v 1.7 2004/11/17 20:48:15 lhamel Exp $
133  * @see RE
134  * @see RECompiler
135  * @deprecated since v5.6, use jakarta oro
136  */
137 public class REProgram {
138     static final int OPT_HASBACKREFS = 1;
139     char[] instruction; // The compiled regular expression 'program'
140     int lenInstruction; // The amount of the instruction buffer in use
141     char[] prefix; // Prefix string optimization
142     int flags; // Optimization flags (REProgram.OPT_*)
143 
144     /**
145      * Constructs a program object from a character array
146      *
147      * @param instruction Character array with RE opcode instructions in it
148      */
149     public REProgram(char[] instruction) {
150         this(instruction, instruction.length);
151     }
152 
153     /**
154      * Constructs a program object from a character array
155      *
156      * @param instruction    Character array with RE opcode instructions in it
157      * @param lenInstruction Amount of instruction array in use
158      */
159     public REProgram(char[] instruction, int lenInstruction) {
160         setInstructions(instruction, lenInstruction);
161     }
162 
163     /**
164      * Returns a copy of the current regular expression program in a character
165      * array that is exactly the right length to hold the program.  If there is
166      * no program compiled yet, getInstructions() will return null.
167      *
168      * @return A copy of the current compiled RE program
169      */
170     public char[] getInstructions() {
171 
172         // Ensure program has been compiled!
173         if (lenInstruction != 0) {
174 
175             // Return copy of program
176             char[] ret = new char[lenInstruction];
177             System.arraycopy(instruction, 0, ret, 0, lenInstruction);
178 
179             return ret;
180         }
181 
182         return null;
183     }
184 
185     /**
186      * Sets a new regular expression program to run.  It is this method which
187      * performs any special compile-time search optimizations.  Currently only
188      * two optimizations are in place - one which checks for backreferences
189      * (so that they can be lazily allocated) and another which attempts to
190      * find an prefix anchor string so that substantial amounts of input can
191      * potentially be skipped without running the actual program.
192      *
193      * @param instruction    Program instruction buffer
194      * @param lenInstruction Length of instruction buffer in use
195      */
196     public void setInstructions(char[] instruction, int lenInstruction) {
197 
198         // Save reference to instruction array
199         this.instruction = instruction;
200         this.lenInstruction = lenInstruction;
201 
202         // Initialize other program-related variables
203         flags = 0;
204         prefix = null;
205 
206         // Try various compile-time optimizations if there's a program
207         if (instruction != null && lenInstruction != 0) {
208 
209             // If the first node is a branch
210             if (lenInstruction >= RE.nodeSize &&
211                     instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) {
212 
213                 // to the end node
214                 int next = instruction[0 + RE.offsetNext];
215 
216                 if (instruction[next + RE.offsetOpcode] == RE.OP_END) {
217 
218                     // and the branch starts with an atom
219                     if (lenInstruction >= (RE.nodeSize * 2) &&
220                             instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM) {
221 
222                         // then get that atom as an prefix because there's no other choice
223                         int lenAtom = instruction[RE.nodeSize +
224                                 RE.offsetOpdata];
225                         prefix = new char[lenAtom];
226                         System.arraycopy(instruction, RE.nodeSize * 2, prefix,
227                                 0, lenAtom);
228                     }
229                 }
230             }
231             BackrefScanLoop:
232 
233                         // Check for backreferences
234                         for (int i = 0; i < lenInstruction; i += RE.nodeSize) {
235                             switch (instruction[i + RE.offsetOpcode]) {
236                                 case RE.OP_ANYOF:
237                                     i += (instruction[i + RE.offsetOpdata] * 2);
238                                     break;
239 
240                                 case RE.OP_ATOM:
241                                     i += instruction[i + RE.offsetOpdata];
242                                     break;
243 
244                                 case RE.OP_BACKREF:
245                                     flags |= OPT_HASBACKREFS;
246                                     break BackrefScanLoop;
247                             }
248                         }
249         }
250     }
251 }