Source code: org/apache/oro/text/regex/Perl5Debug.java
1 /*
2 * $Id: Perl5Debug.java,v 1.11 2003/11/07 20:16:25 dfs Exp $
3 *
4 * ====================================================================
5 * The Apache Software License, Version 1.1
6 *
7 * Copyright (c) 2000 The Apache Software Foundation. All rights
8 * reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 *
22 * 3. The end-user documentation included with the redistribution,
23 * if any, must include the following acknowledgment:
24 * "This product includes software developed by the
25 * Apache Software Foundation (http://www.apache.org/)."
26 * Alternately, this acknowledgment may appear in the software itself,
27 * if and wherever such third-party acknowledgments normally appear.
28 *
29 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
30 * must not be used to endorse or promote products derived from this
31 * software without prior written permission. For written
32 * permission, please contact apache@apache.org.
33 *
34 * 5. Products derived from this software may not be called "Apache"
35 * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
36 * name, without prior written permission of the Apache Software Foundation.
37 *
38 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
39 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
40 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
41 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
45 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
46 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
47 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
48 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49 * SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This software consists of voluntary contributions made by many
53 * individuals on behalf of the Apache Software Foundation. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58
59 package org.apache.oro.text.regex;
60
61
62 /**
63 * The Perl5Debug class is not intended for general use and should not
64 * be instantiated, but is provided because some users may find the output
65 * of its single method to be useful.
66 * The Perl5Compiler class generates a representation of a
67 * regular expression identical to that of Perl5 in the abstract, but
68 * not in terms of actual data structures. The Perl5Debug class allows
69 * the bytecode program contained by a Perl5Pattern to be printed out for
70 * comparison with the program generated by Perl5 with the -r option.
71 *
72 * @version @version@
73 * @since 1.0
74 * @see Perl5Pattern
75 */
76 public final class Perl5Debug {
77
78 /**
79 * A dummy constructor to prevent instantiation of Perl5Debug.
80 */
81 private Perl5Debug() { }
82
83
84 /**
85 * This method prints to a String the bytecode program contained in a
86 * Perl5Pattern._ The program byte codes are identical to those
87 * generated by Perl5 with the -r option, but the offsets are
88 * different due to the different data structures used. This
89 * method is useful for diagnosing suspected bugs. The Perl5Compiler
90 * class is designed to produce regular expression programs identical
91 * to those produced by Perl5. By comparing the output of this method
92 * and the output of Perl5 with the -r option on the same regular
93 * expression, you can determine if Perl5Compiler correctly compiled
94 * an expression.
95 * <p>
96 * @param regexp The Perl5Pattern to print.
97 * @return A string representation of the bytecode program defining the
98 * regular expression.
99 */
100
101
102 public static String printProgram(Perl5Pattern regexp) {
103 StringBuffer buffer;
104 char operator = OpCode._OPEN, prog[];
105 int offset, next;
106
107 prog = regexp._program;
108 offset = 1;
109 buffer = new StringBuffer();
110
111 while(operator != OpCode._END) {
112 operator = prog[offset];
113 buffer.append(offset);
114 _printOperator(prog, offset, buffer);
115
116 next = OpCode._getNext(prog, offset);
117 offset+=OpCode._operandLength[operator];
118
119 buffer.append("(" + next + ")");
120
121 offset+=2;
122
123 if(operator == OpCode._ANYOF) {
124 offset += 16;
125 } else if(operator == OpCode._ANYOFUN || operator == OpCode._NANYOFUN) {
126 while(prog[offset] != OpCode._END) {
127 if(prog[offset] == OpCode._RANGE)
128 offset+=3;
129 else
130 offset+=2;
131 }
132 ++offset;
133 } else if(operator == OpCode._EXACTLY) {
134 ++offset;
135 buffer.append(" <");
136
137 //while(prog[offset] != '0')
138 while(prog[offset] != CharStringPointer._END_OF_STRING) {
139 //while(prog[offset] != 0 &&
140 // prog[offset] != CharStringPointer._END_OF_STRING) {
141 buffer.append(prog[offset]);
142 ++offset;
143 }
144 buffer.append(">");
145 ++offset;
146 }
147
148 buffer.append('\n');
149 }
150
151 // Can print some other stuff here.
152 if(regexp._startString != null)
153 buffer.append("start `" + new String(regexp._startString) + "' ");
154 if(regexp._startClassOffset != OpCode._NULL_OFFSET) {
155 buffer.append("stclass `");
156 _printOperator(prog, regexp._startClassOffset, buffer);
157 buffer.append("' ");
158 }
159 if((regexp._anchor & Perl5Pattern._OPT_ANCH) != 0)
160 buffer.append("anchored ");
161 if((regexp._anchor & Perl5Pattern._OPT_SKIP) != 0)
162 buffer.append("plus ");
163 if((regexp._anchor & Perl5Pattern._OPT_IMPLICIT) != 0)
164 buffer.append("implicit ");
165 if(regexp._mustString != null)
166 buffer.append("must have \""+ new String(regexp._mustString) +
167 "\" back " + regexp._back + " ");
168 buffer.append("minlen " + regexp._minLength + '\n');
169
170 return buffer.toString();
171 }
172
173
174 static void _printOperator(char[] program, int offset, StringBuffer buffer) {
175 String str = null;
176
177 buffer.append(":");
178
179 switch(program[offset]) {
180 case OpCode._BOL : str = "BOL"; break;
181 case OpCode._MBOL : str = "MBOL"; break;
182 case OpCode._SBOL : str = "SBOL"; break;
183 case OpCode._EOL : str = "EOL"; break;
184 case OpCode._MEOL : str = "MEOL"; break;
185 case OpCode._ANY : str = "ANY"; break;
186 case OpCode._SANY : str = "SANY"; break;
187 case OpCode._ANYOF : str = "ANYOF"; break;
188 case OpCode._ANYOFUN : str = "ANYOFUN"; break;
189 case OpCode._NANYOFUN : str = "NANYOFUN"; break;
190 /*
191 case OpCode._ANYOF : // debug
192 buffer.append("ANYOF\n\n");
193 int foo = OpCode._OPERAND(offset);
194 char ch;
195 for(ch=0; ch < 256; ch++) {
196 if(ch % 16 == 0)
197 buffer.append(" ");
198 buffer.append((program[foo + (ch >> 4)] &
199 (1 << (ch & 0xf))) == 0 ? 0 : 1);
200 }
201 buffer.append("\n\n");
202 break;
203 */
204 case OpCode._BRANCH : str = "BRANCH"; break;
205 case OpCode._EXACTLY : str = "EXACTLY"; break;
206 case OpCode._NOTHING : str = "NOTHING"; break;
207 case OpCode._BACK : str = "BACK"; break;
208 case OpCode._END : str = "END"; break;
209 case OpCode._ALNUM : str = "ALNUM"; break;
210 case OpCode._NALNUM : str = "NALNUM"; break;
211 case OpCode._BOUND : str = "BOUND"; break;
212 case OpCode._NBOUND : str = "NBOUND"; break;
213 case OpCode._SPACE : str = "SPACE"; break;
214 case OpCode._NSPACE : str = "NSPACE"; break;
215 case OpCode._DIGIT : str = "DIGIT"; break;
216 case OpCode._NDIGIT : str = "NDIGIT"; break;
217 case OpCode._ALPHA : str = "ALPHA"; break;
218 case OpCode._BLANK : str = "BLANK"; break;
219 case OpCode._CNTRL : str = "CNTRL"; break;
220 case OpCode._GRAPH : str = "GRAPH"; break;
221 case OpCode._LOWER : str = "LOWER"; break;
222 case OpCode._PRINT : str = "PRINT"; break;
223 case OpCode._PUNCT : str = "PUNCT"; break;
224 case OpCode._UPPER : str = "UPPER"; break;
225 case OpCode._XDIGIT : str = "XDIGIT"; break;
226 case OpCode._ALNUMC : str = "ALNUMC"; break;
227 case OpCode._ASCII : str = "ASCII"; break;
228 case OpCode._CURLY :
229 buffer.append("CURLY {");
230 buffer.append((int)OpCode._getArg1(program, offset));
231 buffer.append(','); buffer.append((int)OpCode._getArg2(program, offset));
232 buffer.append('}');
233 break;
234 case OpCode._CURLYX:
235 buffer.append("CURLYX {");
236 buffer.append((int)OpCode._getArg1(program, offset));
237 buffer.append(','); buffer.append((int)OpCode._getArg2(program, offset));
238 buffer.append('}');
239 break;
240 case OpCode._REF:
241 buffer.append("REF"); buffer.append((int)OpCode._getArg1(program, offset));
242 break;
243 case OpCode._OPEN:
244 buffer.append("OPEN"); buffer.append((int)OpCode._getArg1(program, offset));
245 break;
246 case OpCode._CLOSE:
247 buffer.append("CLOSE"); buffer.append((int)OpCode._getArg1(program, offset));
248 break;
249 case OpCode._STAR : str = "STAR"; break;
250 case OpCode._PLUS : str = "PLUS"; break;
251 case OpCode._MINMOD : str = "MINMOD"; break;
252 case OpCode._GBOL : str = "GBOL"; break;
253 case OpCode._UNLESSM: str = "UNLESSM"; break;
254 case OpCode._IFMATCH: str = "IFMATCH"; break;
255 case OpCode._SUCCEED: str = "SUCCEED"; break;
256 case OpCode._WHILEM : str = "WHILEM"; break;
257 default:
258 buffer.append("Operator is unrecognized. Faulty expression code!");
259 break;
260 }
261
262 if(str != null)
263 buffer.append(str);
264 }
265 }