Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/apache/oro/text/regex/Perl5Debug.java


1   /*
2    * $Id: Perl5Debug.java,v 1.11 2003/11/07 20:16:25 dfs Exp $
3    *
4    * ====================================================================
5    * The Apache Software License, Version 1.1
6    *
7    * Copyright (c) 2000 The Apache Software Foundation.  All rights
8    * reserved.
9    *
10   * Redistribution and use in source and binary forms, with or without
11   * modification, are permitted provided that the following conditions
12   * are met:
13   *
14   * 1. Redistributions of source code must retain the above copyright
15   *    notice, this list of conditions and the following disclaimer.
16   *
17   * 2. Redistributions in binary form must reproduce the above copyright
18   *    notice, this list of conditions and the following disclaimer in
19   *    the documentation and/or other materials provided with the
20   *    distribution.
21   *
22   * 3. The end-user documentation included with the redistribution,
23   *    if any, must include the following acknowledgment:
24   *       "This product includes software developed by the
25   *        Apache Software Foundation (http://www.apache.org/)."
26   *    Alternately, this acknowledgment may appear in the software itself,
27   *    if and wherever such third-party acknowledgments normally appear.
28   *
29   * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 
30   *    must not be used to endorse or promote products derived from this
31   *    software without prior written permission. For written
32   *    permission, please contact apache@apache.org.
33   *
34   * 5. Products derived from this software may not be called "Apache" 
35   *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 
36   *    name, without prior written permission of the Apache Software Foundation.
37   *
38   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
39   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
40   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
41   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
42   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
45   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
46   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
47   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
48   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49   * SUCH DAMAGE.
50   * ====================================================================
51   *
52   * This software consists of voluntary contributions made by many
53   * individuals on behalf of the Apache Software Foundation.  For more
54   * information on the Apache Software Foundation, please see
55   * <http://www.apache.org/>.
56   */
57  
58  
59  package org.apache.oro.text.regex;
60  
61  
62  /**
63   * The Perl5Debug class is not intended for general use and should not
64   * be instantiated, but is provided because some users may find the output
65   * of its single method to be useful.
66   * The Perl5Compiler class generates a representation of a
67   * regular expression identical to that of Perl5 in the abstract, but
68   * not in terms of actual data structures.  The Perl5Debug class allows
69   * the bytecode program contained by a Perl5Pattern to be printed out for
70   * comparison with the program generated by Perl5 with the -r option.
71   *
72   * @version @version@
73   * @since 1.0
74   * @see Perl5Pattern
75   */
76  public final class Perl5Debug {
77  
78    /**
79     * A dummy constructor to prevent instantiation of Perl5Debug.
80     */
81    private Perl5Debug() { }
82  
83  
84    /**
85     * This method prints to a String the bytecode program contained in a
86     * Perl5Pattern._  The program byte codes are identical to those
87     * generated by Perl5 with the -r option, but the offsets are
88     * different due to the different data structures used.  This
89     * method is useful for diagnosing suspected bugs.  The Perl5Compiler
90     * class is designed to produce regular expression programs identical
91     * to those produced by Perl5.  By comparing the output of this method
92     * and the output of Perl5 with the -r option on the same regular
93     * expression, you can determine if Perl5Compiler correctly compiled
94     * an expression.
95     * <p>
96     * @param regexp  The Perl5Pattern to print.
97     * @return A string representation of the bytecode program defining the
98     *         regular expression.
99     */
100 
101 
102   public static String printProgram(Perl5Pattern regexp) {
103     StringBuffer buffer;
104     char operator = OpCode._OPEN, prog[];
105     int offset, next;
106 
107     prog = regexp._program;
108     offset = 1;
109     buffer = new StringBuffer();
110 
111     while(operator != OpCode._END) {
112       operator = prog[offset];
113       buffer.append(offset);
114       _printOperator(prog, offset, buffer);
115 
116       next = OpCode._getNext(prog, offset);
117       offset+=OpCode._operandLength[operator];
118 
119       buffer.append("(" + next + ")");
120 
121       offset+=2;
122 
123       if(operator == OpCode._ANYOF) {
124   offset += 16;
125       } else if(operator == OpCode._ANYOFUN || operator == OpCode._NANYOFUN) {
126   while(prog[offset] != OpCode._END) {
127     if(prog[offset] == OpCode._RANGE)
128       offset+=3;
129     else
130       offset+=2;
131   }
132   ++offset;
133       } else if(operator == OpCode._EXACTLY) {
134     ++offset;
135     buffer.append(" <");
136 
137   //while(prog[offset] != '0')
138   while(prog[offset] != CharStringPointer._END_OF_STRING) {
139     //while(prog[offset] != 0 &&
140     //  prog[offset] != CharStringPointer._END_OF_STRING) {
141     buffer.append(prog[offset]);
142     ++offset;
143   }
144   buffer.append(">");
145   ++offset;
146       }
147 
148       buffer.append('\n');
149     }
150 
151     // Can print some other stuff here.
152     if(regexp._startString != null)
153       buffer.append("start `" + new String(regexp._startString) + "' ");
154     if(regexp._startClassOffset != OpCode._NULL_OFFSET) {
155       buffer.append("stclass `");
156       _printOperator(prog, regexp._startClassOffset, buffer);
157       buffer.append("' ");
158     }
159     if((regexp._anchor & Perl5Pattern._OPT_ANCH) != 0)
160       buffer.append("anchored ");
161     if((regexp._anchor & Perl5Pattern._OPT_SKIP) != 0)
162       buffer.append("plus ");
163     if((regexp._anchor & Perl5Pattern._OPT_IMPLICIT) != 0)
164       buffer.append("implicit ");
165     if(regexp._mustString != null)
166       buffer.append("must have \""+ new String(regexp._mustString) +
167            "\" back " + regexp._back + " ");
168     buffer.append("minlen " + regexp._minLength + '\n');
169 
170     return buffer.toString();
171   }
172 
173 
174   static void _printOperator(char[] program, int offset, StringBuffer buffer) {
175     String str = null;
176 
177     buffer.append(":");
178 
179     switch(program[offset]) {
180     case OpCode._BOL      : str = "BOL"; break;
181     case OpCode._MBOL     : str = "MBOL"; break;
182     case OpCode._SBOL     : str = "SBOL"; break;
183     case OpCode._EOL      : str = "EOL"; break;
184     case OpCode._MEOL     : str = "MEOL"; break;
185     case OpCode._ANY      : str = "ANY"; break;
186     case OpCode._SANY     : str = "SANY"; break;
187     case OpCode._ANYOF    : str = "ANYOF"; break;
188     case OpCode._ANYOFUN  : str = "ANYOFUN"; break;
189     case OpCode._NANYOFUN : str = "NANYOFUN"; break;
190       /*
191     case OpCode._ANYOF : // debug
192       buffer.append("ANYOF\n\n");
193       int foo = OpCode._OPERAND(offset);
194       char ch;
195       for(ch=0; ch < 256; ch++) {
196   if(ch % 16 == 0)
197     buffer.append(" ");
198   buffer.append((program[foo + (ch >> 4)] &
199            (1 << (ch & 0xf))) == 0 ? 0 : 1);
200       }
201       buffer.append("\n\n");
202       break;
203       */
204     case OpCode._BRANCH  : str = "BRANCH"; break;
205     case OpCode._EXACTLY : str = "EXACTLY"; break;
206     case OpCode._NOTHING : str = "NOTHING"; break;
207     case OpCode._BACK    : str = "BACK"; break;
208     case OpCode._END     : str = "END"; break;
209     case OpCode._ALNUM   : str = "ALNUM"; break;
210     case OpCode._NALNUM  : str = "NALNUM"; break;
211     case OpCode._BOUND   : str = "BOUND"; break;
212     case OpCode._NBOUND  : str = "NBOUND"; break;
213     case OpCode._SPACE   : str = "SPACE"; break;
214     case OpCode._NSPACE  : str = "NSPACE"; break;
215     case OpCode._DIGIT   : str = "DIGIT"; break;
216     case OpCode._NDIGIT  : str = "NDIGIT"; break;
217     case OpCode._ALPHA   : str = "ALPHA"; break;
218     case OpCode._BLANK   : str = "BLANK"; break;
219     case OpCode._CNTRL   : str = "CNTRL"; break;
220     case OpCode._GRAPH   : str = "GRAPH"; break;
221     case OpCode._LOWER   : str = "LOWER"; break;
222     case OpCode._PRINT   : str = "PRINT"; break;
223     case OpCode._PUNCT   : str = "PUNCT"; break;
224     case OpCode._UPPER   : str = "UPPER"; break;
225     case OpCode._XDIGIT  : str = "XDIGIT"; break;
226     case OpCode._ALNUMC  : str = "ALNUMC"; break;
227     case OpCode._ASCII   : str = "ASCII"; break;
228     case OpCode._CURLY :
229       buffer.append("CURLY {");
230       buffer.append((int)OpCode._getArg1(program, offset));
231       buffer.append(','); buffer.append((int)OpCode._getArg2(program, offset));
232       buffer.append('}');
233       break;
234     case OpCode._CURLYX:
235       buffer.append("CURLYX {");
236       buffer.append((int)OpCode._getArg1(program, offset));
237       buffer.append(','); buffer.append((int)OpCode._getArg2(program, offset));
238       buffer.append('}');
239       break;
240     case OpCode._REF:
241       buffer.append("REF"); buffer.append((int)OpCode._getArg1(program, offset));
242       break;
243     case OpCode._OPEN:
244       buffer.append("OPEN"); buffer.append((int)OpCode._getArg1(program, offset));
245       break;
246     case OpCode._CLOSE:
247       buffer.append("CLOSE"); buffer.append((int)OpCode._getArg1(program, offset));
248       break;
249     case OpCode._STAR   : str = "STAR"; break;
250     case OpCode._PLUS   : str = "PLUS"; break;
251     case OpCode._MINMOD : str = "MINMOD"; break;
252     case OpCode._GBOL   : str = "GBOL"; break;
253     case OpCode._UNLESSM: str = "UNLESSM"; break;
254     case OpCode._IFMATCH: str = "IFMATCH"; break;
255     case OpCode._SUCCEED: str = "SUCCEED"; break;
256     case OpCode._WHILEM : str = "WHILEM"; break;
257     default:
258       buffer.append("Operator is unrecognized.  Faulty expression code!");
259       break;
260     }
261     
262     if(str != null)
263       buffer.append(str);
264   }
265 }