Source code: com/jcorporate/expresso/ext/regexp/REProgram.java
1 /* ====================================================================
2 * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3 *
4 * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * 3. The end-user documentation included with the redistribution,
19 * if any, must include the following acknowledgment:
20 * "This product includes software developed by Jcorporate Ltd.
21 * (http://www.jcorporate.com/)."
22 * Alternately, this acknowledgment may appear in the software itself,
23 * if and wherever such third-party acknowledgments normally appear.
24 *
25 * 4. "Jcorporate" and product names such as "Expresso" must
26 * not be used to endorse or promote products derived from this
27 * software without prior written permission. For written permission,
28 * please contact info@jcorporate.com.
29 *
30 * 5. Products derived from this software may not be called "Expresso",
31 * or other Jcorporate product names; nor may "Expresso" or other
32 * Jcorporate product names appear in their name, without prior
33 * written permission of Jcorporate Ltd.
34 *
35 * 6. No product derived from this software may compete in the same
36 * market space, i.e. framework, without prior written permission
37 * of Jcorporate Ltd. For written permission, please contact
38 * partners@jcorporate.com.
39 *
40 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43 * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This software consists of voluntary contributions made by many
55 * individuals on behalf of the Jcorporate Ltd. Contributions back
56 * to the project(s) are encouraged when you make modifications.
57 * Please send them to support@jcorporate.com. For more information
58 * on Jcorporate Ltd. and its products, please see
59 * <http://www.jcorporate.com/>.
60 *
61 * Portions of this software are based upon other open source
62 * products and are subject to their respective licenses.
63 */
64
65 package com.jcorporate.expresso.ext.regexp;
66
67 /*
68 * ====================================================================
69 *
70 * The Apache Software License, Version 1.1
71 *
72 * Copyright (c) 1999 The Apache Software Foundation. All rights
73 * reserved.
74 *
75 * Redistribution and use in source and binary forms, with or without
76 * modification, are permitted provided that the following conditions
77 * are met:
78 *
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 *
82 * 2. Redistributions in binary form must reproduce the above copyright
83 * notice, this list of conditions and the following disclaimer in
84 * the documentation and/or other materials provided with the
85 * distribution.
86 *
87 * 3. The end-user documentation included with the redistribution, if
88 * any, must include the following acknowlegement:
89 * "This product includes software developed by the
90 * Apache Software Foundation (http://www.apache.org/)."
91 * Alternately, this acknowlegement may appear in the software itself,
92 * if and wherever such third-party acknowlegements normally appear.
93 *
94 * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
95 * Foundation" must not be used to endorse or promote products derived
96 * from this software without prior written permission. For written
97 * permission, please contact apache@apache.org.
98 *
99 * 5. Products derived from this software may not be called "Apache"
100 * nor may "Apache" appear in their names without prior written
101 * permission of the Apache Group.
102 *
103 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
104 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
105 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
106 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
107 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
108 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
109 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
110 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
111 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
112 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
113 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
114 * SUCH DAMAGE.
115 * ====================================================================
116 *
117 * This software consists of voluntary contributions made by many
118 * individuals on behalf of the Apache Software Foundation. For more
119 * information on the Apache Software Foundation, please see
120 * <http://www.apache.org/>.
121 *
122 */
123
124
125 /**
126 * A class that holds compiled regular expressions. This is exposed mainly
127 * for use by the recompile utility (which helps you produce precompiled
128 * REProgram objects). You should not otherwise need to work directly with
129 * this class.
130 *
131 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
132 * @version $Id: REProgram.java,v 1.7 2004/11/17 20:48:15 lhamel Exp $
133 * @see RE
134 * @see RECompiler
135 * @deprecated since v5.6, use jakarta oro
136 */
137 public class REProgram {
138 static final int OPT_HASBACKREFS = 1;
139 char[] instruction; // The compiled regular expression 'program'
140 int lenInstruction; // The amount of the instruction buffer in use
141 char[] prefix; // Prefix string optimization
142 int flags; // Optimization flags (REProgram.OPT_*)
143
144 /**
145 * Constructs a program object from a character array
146 *
147 * @param instruction Character array with RE opcode instructions in it
148 */
149 public REProgram(char[] instruction) {
150 this(instruction, instruction.length);
151 }
152
153 /**
154 * Constructs a program object from a character array
155 *
156 * @param instruction Character array with RE opcode instructions in it
157 * @param lenInstruction Amount of instruction array in use
158 */
159 public REProgram(char[] instruction, int lenInstruction) {
160 setInstructions(instruction, lenInstruction);
161 }
162
163 /**
164 * Returns a copy of the current regular expression program in a character
165 * array that is exactly the right length to hold the program. If there is
166 * no program compiled yet, getInstructions() will return null.
167 *
168 * @return A copy of the current compiled RE program
169 */
170 public char[] getInstructions() {
171
172 // Ensure program has been compiled!
173 if (lenInstruction != 0) {
174
175 // Return copy of program
176 char[] ret = new char[lenInstruction];
177 System.arraycopy(instruction, 0, ret, 0, lenInstruction);
178
179 return ret;
180 }
181
182 return null;
183 }
184
185 /**
186 * Sets a new regular expression program to run. It is this method which
187 * performs any special compile-time search optimizations. Currently only
188 * two optimizations are in place - one which checks for backreferences
189 * (so that they can be lazily allocated) and another which attempts to
190 * find an prefix anchor string so that substantial amounts of input can
191 * potentially be skipped without running the actual program.
192 *
193 * @param instruction Program instruction buffer
194 * @param lenInstruction Length of instruction buffer in use
195 */
196 public void setInstructions(char[] instruction, int lenInstruction) {
197
198 // Save reference to instruction array
199 this.instruction = instruction;
200 this.lenInstruction = lenInstruction;
201
202 // Initialize other program-related variables
203 flags = 0;
204 prefix = null;
205
206 // Try various compile-time optimizations if there's a program
207 if (instruction != null && lenInstruction != 0) {
208
209 // If the first node is a branch
210 if (lenInstruction >= RE.nodeSize &&
211 instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) {
212
213 // to the end node
214 int next = instruction[0 + RE.offsetNext];
215
216 if (instruction[next + RE.offsetOpcode] == RE.OP_END) {
217
218 // and the branch starts with an atom
219 if (lenInstruction >= (RE.nodeSize * 2) &&
220 instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM) {
221
222 // then get that atom as an prefix because there's no other choice
223 int lenAtom = instruction[RE.nodeSize +
224 RE.offsetOpdata];
225 prefix = new char[lenAtom];
226 System.arraycopy(instruction, RE.nodeSize * 2, prefix,
227 0, lenAtom);
228 }
229 }
230 }
231 BackrefScanLoop:
232
233 // Check for backreferences
234 for (int i = 0; i < lenInstruction; i += RE.nodeSize) {
235 switch (instruction[i + RE.offsetOpcode]) {
236 case RE.OP_ANYOF:
237 i += (instruction[i + RE.offsetOpdata] * 2);
238 break;
239
240 case RE.OP_ATOM:
241 i += instruction[i + RE.offsetOpdata];
242 break;
243
244 case RE.OP_BACKREF:
245 flags |= OPT_HASBACKREFS;
246 break BackrefScanLoop;
247 }
248 }
249 }
250 }
251 }