Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/apache/oro/text/regex/Perl5Compiler.java


1   /*
2    * $Id: Perl5Compiler.java,v 1.21 2003/11/07 20:16:25 dfs Exp $
3    *
4    * ====================================================================
5    * The Apache Software License, Version 1.1
6    *
7    * Copyright (c) 2000 The Apache Software Foundation.  All rights
8    * reserved.
9    *
10   * Redistribution and use in source and binary forms, with or without
11   * modification, are permitted provided that the following conditions
12   * are met:
13   *
14   * 1. Redistributions of source code must retain the above copyright
15   *    notice, this list of conditions and the following disclaimer.
16   *
17   * 2. Redistributions in binary form must reproduce the above copyright
18   *    notice, this list of conditions and the following disclaimer in
19   *    the documentation and/or other materials provided with the
20   *    distribution.
21   *
22   * 3. The end-user documentation included with the redistribution,
23   *    if any, must include the following acknowledgment:
24   *       "This product includes software developed by the
25   *        Apache Software Foundation (http://www.apache.org/)."
26   *    Alternately, this acknowledgment may appear in the software itself,
27   *    if and wherever such third-party acknowledgments normally appear.
28   *
29   * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 
30   *    must not be used to endorse or promote products derived from this
31   *    software without prior written permission. For written
32   *    permission, please contact apache@apache.org.
33   *
34   * 5. Products derived from this software may not be called "Apache" 
35   *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 
36   *    name, without prior written permission of the Apache Software Foundation.
37   *
38   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
39   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
40   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
41   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
42   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
45   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
46   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
47   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
48   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49   * SUCH DAMAGE.
50   * ====================================================================
51   *
52   * This software consists of voluntary contributions made by many
53   * individuals on behalf of the Apache Software Foundation.  For more
54   * information on the Apache Software Foundation, please see
55   * <http://www.apache.org/>.
56   */
57  
58  
59  package org.apache.oro.text.regex;
60  
61  import java.util.*;
62  
63  /**
64   * The Perl5Compiler class is used to create compiled regular expressions
65   * conforming to the Perl5 regular expression syntax.  It generates
66   * Perl5Pattern instances upon compilation to be used in conjunction
67   * with a Perl5Matcher instance.  Please see the user's guide for more 
68   * information about Perl5 regular expressions.
69   * <p>
70   * Perl5Compiler and Perl5Matcher are designed with the intent that
71   * you use a separate instance of each per thread to avoid the overhead
72   * of both synchronization and concurrent access (e.g., a match that takes
73   * a long time in one thread will block the progress of another thread with
74   * a shorter match).  If you want to use a single instance of each
75   * in a concurrent program, you must appropriately protect access to
76   * the instances with critical sections.  If you want to share Perl5Pattern
77   * instances between concurrently executing instances of Perl5Matcher, you
78   * must compile the patterns with {@link Perl5Compiler#READ_ONLY_MASK}.
79   *
80   * @version @version@
81   * @since 1.0
82   * @see PatternCompiler
83   * @see MalformedPatternException
84   * @see Perl5Pattern
85   * @see Perl5Matcher
86   */
87  
88  public final class Perl5Compiler implements PatternCompiler {
89    private static final int __WORSTCASE = 0, __NONNULL = 0x1, __SIMPLE = 0x2,
90                             __SPSTART = 0x4, __TRYAGAIN = 0x8;
91  
92    private static final char
93      __CASE_INSENSITIVE = 0x0001,
94      __GLOBAL           = 0x0002,
95      __KEEP             = 0x0004,
96      __MULTILINE        = 0x0008,
97      __SINGLELINE       = 0x0010,
98      __EXTENDED         = 0x0020,
99      __READ_ONLY        = 0x8000;
100 
101   private static final String __HEX_DIGIT =
102   "0123456789abcdef0123456789ABCDEFx";
103   private CharStringPointer __input;
104   private boolean __sawBackreference;
105   private char[] __modifierFlags = { 0 };
106 
107   // IMPORTANT: __numParentheses starts out equal to 1 during compilation.
108   // It is always one greater than the number of parentheses encountered
109   // so far in the regex.  That is because it refers to the number of groups
110   // to save, and the entire match is always saved (group 0)
111   private int __numParentheses, __programSize, __cost;
112 
113   // When doing the second pass and actually generating code, __programSize
114   // keeps track of the current offset.
115   private char[] __program;
116 
117   /** Lookup table for POSIX character class names */
118   private static final HashMap __hashPOSIX;
119   
120   static {
121     __hashPOSIX = new HashMap();
122     __hashPOSIX.put("alnum",     new Character(OpCode._ALNUMC));
123     __hashPOSIX.put("word",      new Character(OpCode._ALNUM));
124     __hashPOSIX.put("alpha",     new Character(OpCode._ALPHA));
125     __hashPOSIX.put("blank",     new Character(OpCode._BLANK));
126     __hashPOSIX.put("cntrl",     new Character(OpCode._CNTRL));
127     __hashPOSIX.put("digit",     new Character(OpCode._DIGIT));
128     __hashPOSIX.put("graph",     new Character(OpCode._GRAPH));
129     __hashPOSIX.put("lower",     new Character(OpCode._LOWER));
130     __hashPOSIX.put("print",     new Character(OpCode._PRINT));
131     __hashPOSIX.put("punct",     new Character(OpCode._PUNCT));
132     __hashPOSIX.put("space",     new Character(OpCode._SPACE));
133     __hashPOSIX.put("upper",     new Character(OpCode._UPPER));
134     __hashPOSIX.put("xdigit",    new Character(OpCode._XDIGIT));
135     __hashPOSIX.put("ascii",     new Character(OpCode._ASCII));
136   }
137 
138 
139   /**
140    * The default mask for the {@link #compile compile} methods.
141    * It is equal to 0.
142    * The default behavior is for a regular expression to be case sensitive
143    * and to not specify if it is multiline or singleline.  When MULITLINE_MASK
144    * and SINGLINE_MASK are not defined, the <b>^</b>, <b>$</b>, and <b>.</b>
145    * metacharacters are
146    * interpreted according to the value of isMultiline() in Perl5Matcher.
147    * The default behavior of Perl5Matcher is to treat the Perl5Pattern
148    * as though MULTILINE_MASK were enabled.  If isMultiline() returns false,
149    * then the pattern is treated as though SINGLINE_MASK were set.  However,
150    * compiling a pattern with the MULTILINE_MASK or SINGLELINE_MASK masks
151    * will ALWAYS override whatever behavior is specified by the setMultiline()
152    * in Perl5Matcher.
153    */
154   public static final int DEFAULT_MASK          = 0;
155 
156   /**
157    * A mask passed as an option to the {@link #compile compile} methods
158    * to indicate a compiled regular expression should be case insensitive.
159    */
160   public static final int CASE_INSENSITIVE_MASK = __CASE_INSENSITIVE;
161 
162   /**
163    * A mask passed as an option to the  {@link #compile compile} methods
164    * to indicate a compiled regular expression should treat input as having
165    * multiple lines.  This option affects the interpretation of
166    * the <b>^</b> and <b>$</b> metacharacters.  When this mask is used,
167    * the <b>^</b> metacharacter matches at the beginning of every line,
168    * and the <b>$</b> metacharacter matches at the end of every line.
169    * Additionally the <b> . </b> metacharacter will not match newlines when
170    * an expression is compiled with <b> MULTILINE_MASK </b>, which is its
171    * default behavior.
172    */
173   public static final int MULTILINE_MASK        = __MULTILINE;
174 
175   /**
176    * A mask passed as an option to the {@link #compile compile} methods
177    * to indicate a compiled regular expression should treat input as being
178    * a single line.  This option affects the interpretation of
179    * the <b>^</b> and <b>$</b> metacharacters.  When this mask is used,
180    * the <b>^</b> metacharacter matches at the beginning of the input,
181    * and the <b>$</b> metacharacter matches at the end of the input.
182    * The <b>^</b> and <b>$</b> metacharacters will not match at the beginning
183    * and end of lines occurring between the begnning and end of the input.
184    * Additionally, the <b> . </b> metacharacter will match newlines when
185    * an expression is compiled with <b> SINGLELINE_MASK </b>, unlike its
186    * default behavior.
187    */
188   public static final int SINGLELINE_MASK       = __SINGLELINE;
189 
190   /**
191    * A mask passed as an option to the {@link #compile compile} methods
192    * to indicate a compiled regular expression should be treated as a Perl5
193    * extended pattern (i.e., a pattern using the <b>/x</b> modifier).  This 
194    * option tells the compiler to ignore whitespace that is not backslashed or
195    * within a character class.  It also tells the compiler to treat the
196    * <b>#</b> character as a metacharacter introducing a comment as in
197    * Perl.  In other words, the <b>#</b> character will comment out any
198    * text in the regular expression between it and the next newline.
199    * The intent of this option is to allow you to divide your patterns
200    * into more readable parts.  It is provided to maintain compatibility
201    * with Perl5 regular expressions, although it will not often
202    * make sense to use it in Java.
203    */
204   public static final int EXTENDED_MASK         = __EXTENDED;
205 
206   /**
207    * A mask passed as an option to the {@link #compile compile} methods
208    * to indicate that the resulting Perl5Pattern should be treated as a
209    * read only data structure by Perl5Matcher, making it safe to share
210    * a single Perl5Pattern instance among multiple threads without needing
211    * synchronization.  Without this option, Perl5Matcher reserves the right
212    * to store heuristic or other information in Perl5Pattern that might
213    * accelerate future matches.  When you use this option, Perl5Matcher will
214    * not store or modify any information in a Perl5Pattern.  Use this option
215    * when you want to share a Perl5Pattern instance among multiple threads
216    * using different Perl5Matcher instances.
217    */
218   public static final int READ_ONLY_MASK        = __READ_ONLY;
219 
220   /**
221    * Given a character string, returns a Perl5 expression that interprets
222    * each character of the original string literally.  In other words, all
223    * special metacharacters are quoted/escaped.  This method is useful for
224    * converting user input meant for literal interpretation into a safe
225    * regular expression representing the literal input.
226    * <p>
227    * In effect, this method is the analog of the Perl5 quotemeta() builtin
228    * method.
229    * <p>
230    * @param expression The expression to convert.
231    * @return A String containing a Perl5 regular expression corresponding to
232    *         a literal interpretation of the pattern.
233    */
234   public static final String quotemeta(char[] expression) {
235     int ch;
236     StringBuffer buffer;
237 
238     buffer = new StringBuffer(2*expression.length);
239     for(ch = 0; ch < expression.length; ch++) {
240       if(!OpCode._isWordCharacter(expression[ch]))
241   buffer.append('\\');
242       buffer.append(expression[ch]);
243     }
244 
245     return buffer.toString();
246   }
247 
248   /**
249    * Given a character string, returns a Perl5 expression that interprets
250    * each character of the original string literally.  In other words, all
251    * special metacharacters are quoted/escaped.  This method is useful for
252    * converting user input meant for literal interpretation into a safe
253    * regular expression representing the literal input.
254    * <p>
255    * In effect, this method is the analog of the Perl5 quotemeta() builtin
256    * method.
257    * <p>
258    * @param pattern The pattern to convert.
259    * @return A String containing a Perl5 regular expression corresponding to
260    *         a literal interpretation of the pattern.
261    */
262   public static final String quotemeta(String expression) {
263     return quotemeta(expression.toCharArray());
264   }
265 
266   private static boolean __isSimpleRepetitionOp(char ch) {
267     return (ch == '*' || ch == '+' || ch == '?');
268   }
269 
270   private static boolean __isComplexRepetitionOp(char[] ch, int offset) {
271     if(offset < ch.length && offset >= 0)
272        return (ch[offset] == '*' || ch[offset] == '+' || ch[offset] == '?'
273          || (ch[offset] == '{' && __parseRepetition(ch, offset)));
274     return false;
275   }
276 
277   // determines if {\d+,\d*} is the next part of the string
278   private static boolean __parseRepetition(char[] str, int offset) {
279     if(str[offset] != '{')
280       return false;
281     ++offset;
282 
283     if(offset >= str.length || !Character.isDigit(str[offset]))
284       return false;
285 
286     while(offset < str.length && Character.isDigit(str[offset]))
287       ++offset;
288 
289     if(offset < str.length && str[offset] == ',')
290       ++offset;
291 
292     while(offset < str.length && Character.isDigit(str[offset]))
293       ++offset;
294 
295     if(offset >= str.length || str[offset] != '}')
296       return false;
297 
298     return true;
299   }
300 
301   private static int __parseHex(char[] str, int offset, int maxLength,
302         int[] scanned)
303   {
304     int val = 0, index;
305 
306     scanned[0] = 0;
307     while(offset < str.length && maxLength-- > 0 &&
308     (index = __HEX_DIGIT.indexOf(str[offset])) != -1) {
309       val <<= 4;
310       val |= (index & 15);
311       ++offset;
312       ++scanned[0];
313     }
314 
315     return val;
316   }
317 
318   private static int __parseOctal(char[] str, int offset, int maxLength,
319          int[] scanned)
320   {
321     int val = 0;
322 
323     scanned[0] = 0;
324     while(offset < str.length && 
325     maxLength > 0 && str[offset] >= '0' && str[offset] <= '7') {
326       val <<= 3;
327       val |= (str[offset] - '0');
328       --maxLength;
329       ++offset;
330       ++scanned[0];
331     }
332 
333     return val;
334   }
335 
336   private static void __setModifierFlag(char[] flags, char ch) {
337     switch(ch) {
338     case 'i' : flags[0] |= __CASE_INSENSITIVE; return;
339     case 'g' : flags[0] |= __GLOBAL; return;
340     case 'o' : flags[0] |= __KEEP; return;
341     case 'm' : flags[0] |= __MULTILINE; return;
342     case 's' : flags[0] |= __SINGLELINE; return;
343     case 'x' : flags[0] |= __EXTENDED; return;
344     }
345   }
346 
347   // Emit a specific character code.
348   private void __emitCode(char code) {
349 
350     if(__program != null)
351       __program[__programSize] = code;
352 
353     ++__programSize;
354   }
355 
356 
357   // Emit an operator with no arguments.
358   // Return an offset into the __program array as a pointer to node.
359   private int __emitNode(char operator) {
360     int offset;
361 
362     offset = __programSize;
363 
364     if(__program == null)
365       __programSize+=2;
366     else {
367       __program[__programSize++] = operator;
368       __program[__programSize++] = OpCode._NULL_POINTER;
369     }
370 
371     return offset;
372   }
373 
374 
375   // Emit an operator with arguments.
376   // Return an offset into the __programarray as a pointer to node.
377   private int __emitArgNode(char operator, char arg) {
378     int offset;
379 
380     offset = __programSize;
381 
382     if(__program== null)
383       __programSize+=3;
384     else {
385       __program[__programSize++] = operator;
386       __program[__programSize++] = OpCode._NULL_POINTER;
387       __program[__programSize++] = arg;
388     }
389 
390     return offset;
391   }
392 
393 
394   // Insert an operator at a given offset.
395   private void __programInsertOperator(char operator, int operand) {
396     int src, dest, offset;
397 
398     offset = (OpCode._opType[operator] == OpCode._CURLY ? 2 : 0);
399 
400 
401     if(__program== null) {
402       __programSize+=(2 + offset);
403       return;
404     }
405 
406     src = __programSize;
407     __programSize+=(2 + offset);
408     dest = __programSize;
409 
410     while(src > operand) {
411       --src;
412       --dest;
413       __program[dest] = __program[src];
414     }
415 
416     __program[operand++] = operator;
417     __program[operand++] = OpCode._NULL_POINTER;
418 
419     while(offset-- > 0)
420       __program[operand++] = OpCode._NULL_POINTER;
421 
422   }
423 
424 
425 
426   private void __programAddTail(int current, int value) {
427     int scan, temp, offset;
428     if(__program == null || current == OpCode._NULL_OFFSET)
429       return;
430 
431     scan = current;
432 
433     while(true) {
434       temp = OpCode._getNext(__program, scan);
435       if(temp == OpCode._NULL_OFFSET)
436   break;
437       scan = temp;
438     }
439 
440     if(__program[scan] == OpCode._BACK)
441       offset = scan - value;
442     else
443       offset = value - scan;
444 
445     __program[scan + 1] = (char)offset;
446   }
447 
448 
449   private void __programAddOperatorTail(int current, int value) {
450     if(__program == null || current == OpCode._NULL_OFFSET ||
451        OpCode._opType[__program[current]] != OpCode._BRANCH)
452       return;
453     __programAddTail(OpCode._getNextOperator(current), value);
454   }
455 
456 
457   private char __getNextChar() {
458     char ret, value;
459 
460     ret = __input._postIncrement();
461 
462     while(true) {
463       value = __input._getValue();
464 
465       if(value == '(' && __input._getValueRelative(1) == '?' &&
466    __input._getValueRelative(2) == '#') {
467   // Skip comments
468   while(value != CharStringPointer._END_OF_STRING && value != ')')
469     value = __input._increment();
470   __input._increment();
471   continue;
472       }
473 
474       if((__modifierFlags[0] & __EXTENDED) != 0) {
475   if(Character.isWhitespace(value)) {
476     __input._increment();
477     continue;
478   } else if(value == '#') {
479     while(value != CharStringPointer._END_OF_STRING && value != '\n')
480       value = __input._increment();
481     __input._increment();
482     continue;
483   }
484       }
485 
486       return ret;
487     }
488 
489   }
490 
491 
492   private int __parseAlternation(int[] retFlags)
493     throws MalformedPatternException 
494   {
495     int chain, offset, latest;
496     int flags = 0;
497     char value;
498 
499     retFlags[0] = __WORSTCASE;
500 
501     offset = __emitNode(OpCode._BRANCH);
502 
503     chain  = OpCode._NULL_OFFSET;
504 
505     if(__input._getOffset() == 0) {
506       __input._setOffset(-1);
507       __getNextChar();
508     } else {
509       __input._decrement();
510       __getNextChar();
511     }
512 
513     value = __input._getValue();
514 
515     while(value != CharStringPointer._END_OF_STRING &&
516     value != '|' && value != ')') {
517       flags &= ~__TRYAGAIN;
518       latest = __parseBranch(retFlags);
519 
520       if(latest == OpCode._NULL_OFFSET) {
521   if((flags & __TRYAGAIN) != 0){
522     value = __input._getValue();
523     continue;
524   }
525   return OpCode._NULL_OFFSET;
526       }
527 
528       retFlags[0] |= (flags & __NONNULL);
529 
530       if(chain == OpCode._NULL_OFFSET)
531   retFlags[0] |= (flags & __SPSTART);
532       else {
533   ++__cost;
534   __programAddTail(chain, latest);
535       }
536       chain = latest;
537       value = __input._getValue();
538     }
539 
540     // If loop was never entered.
541     if(chain == OpCode._NULL_OFFSET)
542       __emitNode(OpCode._NOTHING);
543 
544     return offset;
545   }
546 
547 
548   private int __parseAtom(int[] retFlags) throws MalformedPatternException {
549     boolean doDefault;
550     char value;
551     int offset, flags[] = { 0 };
552     
553     
554     retFlags[0] = __WORSTCASE;
555     doDefault = false;
556     offset = OpCode._NULL_OFFSET;
557 
558   tryAgain:
559     while(true) {
560 
561       value = __input._getValue();
562 
563       switch(value) {
564       case '^' :
565   __getNextChar();
566   // The order here is important in order to support /ms.
567   // /m takes precedence over /s for ^ and $, but not for .
568   if((__modifierFlags[0] & __MULTILINE) != 0)
569     offset = __emitNode(OpCode._MBOL);
570   else if((__modifierFlags[0] & __SINGLELINE) != 0)
571     offset = __emitNode(OpCode._SBOL);
572   else
573     offset = __emitNode(OpCode._BOL);
574   break tryAgain;
575 
576       case '$':
577   __getNextChar();
578   // The order here is important in order to support /ms.
579   // /m takes precedence over /s for ^ and $, but not for .
580   if((__modifierFlags[0] & __MULTILINE) != 0)
581     offset = __emitNode(OpCode._MEOL);
582   else if((__modifierFlags[0] & __SINGLELINE) != 0)
583     offset = __emitNode(OpCode._SEOL);
584   else
585     offset = __emitNode(OpCode._EOL);
586   break tryAgain;
587 
588       case '.':
589   __getNextChar();
590   // The order here is important in order to support /ms.
591   // /m takes precedence over /s for ^ and $, but not for .
592   if((__modifierFlags[0] & __SINGLELINE) != 0)
593     offset = __emitNode(OpCode._SANY);
594   else
595     offset = __emitNode(OpCode._ANY);
596   ++__cost;
597   retFlags[0] |= (__NONNULL | __SIMPLE);
598   break tryAgain;
599 
600       case '[':
601   __input._increment();
602   offset = __parseUnicodeClass();
603   retFlags[0] |= (__NONNULL | __SIMPLE);
604   break tryAgain;
605 
606       case '(':
607   __getNextChar();
608   offset = __parseExpression(true, flags);
609   if(offset == OpCode._NULL_OFFSET) {
610     if((flags[0] & __TRYAGAIN) != 0)
611       continue tryAgain;
612     return OpCode._NULL_OFFSET;
613   }
614   retFlags[0] |= (flags[0] & (__NONNULL | __SPSTART));
615   break tryAgain;
616 
617       case '|':
618       case ')':
619   if((flags[0] & __TRYAGAIN) != 0) {
620     retFlags[0] |= __TRYAGAIN;
621     return OpCode._NULL_OFFSET;
622   }
623 
624   throw new MalformedPatternException("Error in expression at " +
625            __input._toString(__input._getOffset()));
626   //break tryAgain;
627 
628       case '?':
629       case '+':
630       case '*':
631   throw new MalformedPatternException(
632                  "?+* follows nothing in expression");
633   //break tryAgain;
634 
635       case '\\':
636   value = __input._increment();
637 
638   switch(value) {
639   case 'A' :
640     offset = __emitNode(OpCode._SBOL);
641     retFlags[0] |= __SIMPLE;
642     __getNextChar();
643     break;
644   case 'G':
645     offset = __emitNode(OpCode._GBOL);
646     retFlags[0] |= __SIMPLE;
647     __getNextChar();
648     break;
649   case 'Z':
650     offset = __emitNode(OpCode._SEOL);
651     retFlags[0] |= __SIMPLE;
652     __getNextChar();
653     break;
654   case 'w':
655     offset = __emitNode(OpCode._ALNUM);
656     retFlags[0] |= (__NONNULL | __SIMPLE);
657     __getNextChar();
658     break;
659   case 'W':
660     offset = __emitNode(OpCode._NALNUM);
661     retFlags[0] |= (__NONNULL | __SIMPLE);
662     __getNextChar();
663     break;
664   case 'b':
665     offset = __emitNode(OpCode._BOUND);
666     retFlags[0] |= __SIMPLE;
667     __getNextChar();
668     break;
669   case 'B':
670     offset = __emitNode(OpCode._NBOUND);
671     retFlags[0] |= __SIMPLE;
672     __getNextChar();
673     break;
674   case 's':
675     offset = __emitNode(OpCode._SPACE);
676     retFlags[0] |= (__NONNULL | __SIMPLE);
677     __getNextChar();
678     break;
679   case 'S':
680     offset = __emitNode(OpCode._NSPACE);
681     retFlags[0] |= (__NONNULL | __SIMPLE);
682     __getNextChar();
683     break;
684   case 'd':
685     offset = __emitNode(OpCode._DIGIT);
686     retFlags[0] |= (__NONNULL | __SIMPLE);
687     __getNextChar();
688     break;
689   case 'D':
690     offset = __emitNode(OpCode._NDIGIT);
691     retFlags[0] |= (__NONNULL | __SIMPLE);
692     __getNextChar();
693     break;
694   case 'n': case 'r': case 't': case 'f': case 'e': case 'a': case 'x':
695   case 'c': case '0':
696     doDefault = true;
697     break tryAgain;
698   case '1': case '2': case '3': case '4': case '5': case '6': case '7':
699   case '8': case '9':
700     int num;
701     StringBuffer buffer = new StringBuffer(10);
702 
703     num = 0;
704     value = __input._getValueRelative(num);
705 
706     while(Character.isDigit(value)) {
707       buffer.append(value);
708       ++num;
709       value = __input._getValueRelative(num);
710     }
711 
712     try {
713       num = Integer.parseInt(buffer.toString());
714     } catch(NumberFormatException e) {
715       throw new MalformedPatternException(
716      "Unexpected number format exception.  Please report this bug." +
717      "NumberFormatException message: " + e.getMessage());
718     }
719 
720     if(num > 9 && num >= __numParentheses) {
721       doDefault = true;
722       break tryAgain;
723     } else {
724       // A backreference may only occur AFTER its group
725       if(num >= __numParentheses)
726         throw new MalformedPatternException("Invalid backreference: \\" +
727               num);
728       __sawBackreference = true;
729       offset = __emitArgNode(OpCode._REF, (char)num);
730       retFlags[0] |= __NONNULL;
731 
732       value = __input._getValue();
733       while(Character.isDigit(value))
734         value = __input._increment();
735 
736       __input._decrement();
737       __getNextChar();
738     }
739     break;
740   case '\0':
741   case CharStringPointer._END_OF_STRING:
742     if(__input._isAtEnd())
743       throw new
744         MalformedPatternException("Trailing \\ in expression.");
745 
746   // fall through to default
747   default:
748     doDefault = true;
749     break tryAgain;
750   }
751   break tryAgain;
752 
753       case '#':
754   // skip over comments
755   if((__modifierFlags[0] & __EXTENDED) != 0) {
756     while(!__input._isAtEnd() && __input._getValue() != '\n')
757       __input._increment();
758     if(!__input._isAtEnd())
759       continue tryAgain;
760   }
761   // fall through to default
762       default:
763   __input._increment();
764   doDefault = true;
765   break tryAgain;
766       }// end master switch
767     } // end tryAgain
768 
769 
770     if(doDefault) {
771       char ender;
772       int length, pOffset, maxOffset, lastOffset, numLength[];
773 
774       offset = __emitNode(OpCode._EXACTLY);
775       // Not sure that it's ok to use 0 to mark end.
776       //__emitCode((char)0);
777       __emitCode((char)CharStringPointer._END_OF_STRING);
778 
779     forLoop:
780       for(length = 0, pOffset = __input._getOffset() - 1,
781       maxOffset = __input._getLength();
782     length < 127 && pOffset < maxOffset; ++length) {
783 
784   lastOffset = pOffset;
785   value = __input._getValue(pOffset);
786 
787   switch(value) {
788   case '^': case '$': case '.': case '[': case '(': case ')':
789   case '|':
790     break forLoop;
791   case '\\':
792     value = __input._getValue(++pOffset);
793 
794     switch(value) {
795     case 'A': case 'G': case 'Z': case 'w': case 'W': case 'b':
796     case 'B': case 's': case 'S': case 'd': case 'D':
797       --pOffset;
798       break forLoop;
799     case 'n':
800       ender = '\n';
801       ++pOffset;
802       break;
803     case 'r':
804       ender = '\r';
805       ++pOffset;
806       break;
807     case 't':
808       ender = '\t';
809       ++pOffset;
810       break;
811     case 'f':
812       ender = '\f';
813       ++pOffset;
814       break;
815     case 'e':
816       ender = '\033';
817       ++pOffset;
818       break;
819     case 'a':
820       ender = '\007';
821       ++pOffset;
822       break;
823     case 'x':
824       numLength = new int[1];
825       ender = (char)__parseHex(__input._array, ++pOffset, 2, numLength);
826       pOffset+=numLength[0];
827       break;
828     case 'c':
829       ++pOffset;
830       ender = __input._getValue(pOffset++);
831       if(Character.isLowerCase(ender))
832         ender = Character.toUpperCase(ender);
833       ender ^= 64;
834       break;
835     case '0': case '1': case '2': case'3': case '4': case '5':
836     case '6': case '7': case '8': case '9':
837       boolean doOctal = false;
838       value = __input._getValue(pOffset);
839 
840       if(value == '0')
841         doOctal = true;
842       value = __input._getValue(pOffset + 1);
843 
844       if(Character.isDigit(value)) {
845         int num;
846         StringBuffer buffer = new StringBuffer(10);
847 
848         num = pOffset;
849         value = __input._getValue(num);
850 
851         while(Character.isDigit(value)){
852     buffer.append(value);
853     ++num;
854     value = __input._getValue(num);
855         }
856 
857         try {
858     num = Integer.parseInt(buffer.toString());
859         } catch(NumberFormatException e) {
860     throw new MalformedPatternException(
861        "Unexpected number format exception.  Please report this bug." +
862        "NumberFormatException message: " + e.getMessage());
863         }
864 
865         if(!doOctal)
866     doOctal = (num >= __numParentheses);
867       }
868 
869       if(doOctal) {
870         numLength = new int[1];
871         ender = (char)__parseOctal(__input._array, pOffset, 3, numLength);
872         pOffset+=numLength[0];
873       } else {
874         --pOffset;
875         break forLoop;
876       }
877       break;
878     case CharStringPointer._END_OF_STRING:
879     case '\0':
880       if(pOffset >= maxOffset)
881         throw new
882     MalformedPatternException("Trailing \\ in expression.");
883       // fall through to default
884     default:
885       ender = __input._getValue(pOffset++);
886       break;
887     } // end backslash switch
888     break;
889   case '#':
890     if((__modifierFlags[0] & __EXTENDED) != 0) {
891       while(pOffset < maxOffset && __input._getValue(pOffset) != '\n')
892         ++pOffset;
893     }
894     // fall through to whitespace handling
895   case ' ': case '\t': case '\n': case '\r': case '\f': case '\013':
896     if((__modifierFlags[0] & __EXTENDED) != 0) {
897       ++pOffset;
898       --length;
899       continue;
900     }
901     // fall through to default
902   default:
903     ender = __input._getValue(pOffset++);
904     break;
905 
906   }   // end master switch
907 
908   if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
909      Character.isUpperCase(ender))
910     ender = Character.toLowerCase(ender);
911 
912   if(pOffset < maxOffset && __isComplexRepetitionOp(__input._array, pOffset)) {
913     if(length > 0)
914       pOffset = lastOffset;
915     else {
916       ++length;
917       __emitCode(ender);
918     }
919     break;
920   }
921 
922   __emitCode(ender);
923 
924 
925       } // end for loop
926 
927 
928       __input._setOffset(pOffset - 1);
929       __getNextChar();
930 
931       if(length < 0)
932   throw new MalformedPatternException(
933          "Unexpected compilation failure.  Please report this bug!");
934       if(length > 0)
935   retFlags[0] |= __NONNULL;
936       if(length == 1)
937   retFlags[0] |= __SIMPLE;
938       if(__program!= null)
939   __program[OpCode._getOperand(offset)] = (char)length;
940       //__emitCode('\0'); // debug
941       __emitCode(CharStringPointer._END_OF_STRING);
942     }
943 
944     return offset;
945   }
946 
947   // These are the original 8-bit character class handling methods.
948   // We don't want to delete them just yet only to have to dig it out
949   // of revision control later.
950   /*
951   // Set the bits in a character class.  Only recognizes ascii.
952   private void __setCharacterClassBits(char[] bits, int offset, char deflt,
953                char ch)
954   {
955     if(__program== null || ch >= 256)
956       return;
957     ch &= 0xffff;
958 
959     if(deflt == 0) {
960       bits[offset + (ch >> 4)] |= (1 << (ch & 0xf));
961     } else {
962       bits[offset + (ch >> 4)] &= ~(1 << (ch & 0xf));
963     }
964   }
965 
966   private int __parseCharacterClass() throws MalformedPatternException {
967     boolean range = false, skipTest;
968     char clss, deflt, lastclss = Character.MAX_VALUE;
969     int offset, bits, numLength[] = { 0 };
970 
971     offset = __emitNode(OpCode._ANYOF);
972 
973     if(__input._getValue() == '^') {
974       ++__cost;
975       __input._increment();
976       deflt = 0;
977     } else {
978       deflt = 0xffff;
979     }
980 
981     bits = __programSize;
982     for(clss = 0; clss < 16; clss++)
983       __emitCode(deflt);
984 
985     clss = __input._getValue();
986 
987     if(clss == ']' || clss == '-')
988       skipTest = true;
989     else
990       skipTest = false;
991 
992     while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')
993     || skipTest) {
994       // It sucks, but we have to make this assignment every time
995       skipTest = false;
996       __input._increment();
997       if(clss == '\\') {
998   clss = __input._postIncrement();
999 
1000  switch(clss){
1001  case 'w':
1002    for(clss = 0; clss < 256; clss++)
1003      if(OpCode._isWordCharacter(clss))
1004        __setCharacterClassBits(__program, bits, deflt, clss);
1005    lastclss = Character.MAX_VALUE;
1006    continue;
1007  case 'W':
1008    for(clss = 0; clss < 256; clss++)
1009      if(!OpCode._isWordCharacter(clss))
1010        __setCharacterClassBits(__program, bits, deflt, clss);
1011    lastclss = Character.MAX_VALUE;
1012    continue;
1013  case 's':
1014    for(clss = 0; clss < 256; clss++)
1015      if(Character.isWhitespace(clss))
1016        __setCharacterClassBits(__program, bits, deflt, clss);
1017    lastclss = Character.MAX_VALUE;
1018    continue;
1019  case 'S':
1020    for(clss = 0; clss < 256; clss++)
1021      if(!Character.isWhitespace(clss))
1022        __setCharacterClassBits(__program, bits, deflt, clss);
1023    lastclss = Character.MAX_VALUE;
1024    continue;
1025  case 'd':
1026    for(clss = '0'; clss <= '9'; clss++)
1027      __setCharacterClassBits(__program, bits, deflt, clss);
1028    lastclss = Character.MAX_VALUE;
1029    continue;
1030  case 'D':
1031    for(clss = 0; clss < '0'; clss++)
1032      __setCharacterClassBits(__program, bits, deflt, clss);
1033    for(clss = (char)('9' + 1); clss < 256; clss++)
1034      __setCharacterClassBits(__program, bits, deflt, clss);
1035    lastclss = Character.MAX_VALUE;
1036    continue;
1037  case 'n':
1038    clss = '\n';
1039    break;
1040  case 'r':
1041    clss = '\r';
1042    break;
1043  case 't':
1044    clss = '\t';
1045    break;
1046  case 'f':
1047    clss = '\f';
1048    break;
1049  case 'b':
1050    clss = '\b';
1051    break;
1052  case 'e':
1053    clss = '\033';
1054    break;
1055  case 'a':
1056    clss = '\007';
1057    break;
1058  case 'x':
1059    clss = (char)__parseHex(__input._array, __input._getOffset(), 2,
1060          numLength);
1061    __input._increment(numLength[0]);
1062    break;
1063  case 'c':
1064    clss = __input._postIncrement();
1065    if(Character.isLowerCase(clss))
1066      clss = Character.toUpperCase(clss);
1067    clss ^= 64;
1068    break;
1069  case '0': case '1': case '2': case '3': case '4':
1070  case '5': case '6': case '7': case '8': case '9':
1071    clss = (char)__parseOctal(__input._array, __input._getOffset() - 1,
1072            3, numLength);
1073    __input._increment(numLength[0] - 1);
1074    break;
1075  }
1076      }
1077
1078      if(range) {
1079  if(lastclss > clss)
1080    throw new MalformedPatternException(
1081       "Invalid [] range in expression.");
1082  range = false;
1083      } else {
1084  lastclss = clss;
1085
1086  if(__input._getValue() == '-' &&
1087     __input._getOffset() + 1 < __input._getLength() &&
1088     __input._getValueRelative(1) != ']') {
1089    __input._increment();
1090    range = true;
1091    continue;
1092  }
1093      }
1094
1095      while(lastclss <= clss) {
1096  __setCharacterClassBits(__program, bits, deflt, lastclss);
1097  if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
1098     Character.isUpperCase(lastclss))
1099    __setCharacterClassBits(__program, bits, deflt,
1100         Character.toLowerCase(lastclss));
1101
1102  ++lastclss;
1103      }
1104
1105      lastclss = clss;
1106    }
1107
1108    if(__input._getValue() != ']')
1109      throw new MalformedPatternException("Unmatched [] in expression.");
1110
1111    __getNextChar();
1112
1113    return offset;
1114  }
1115  */
1116
1117  private int __parseUnicodeClass() throws MalformedPatternException {
1118    boolean range = false, skipTest;
1119    char clss, lastclss = Character.MAX_VALUE;
1120
1121    int offset, numLength[] = { 0 };
1122    boolean negFlag[] = { false };
1123    boolean opcodeFlag; /* clss isn't character when this flag true. */
1124
1125    if(__input._getValue() == '^') {
1126      offset = __emitNode(OpCode._NANYOFUN);
1127      __input._increment();
1128    } else {
1129      offset = __emitNode(OpCode._ANYOFUN);
1130    }
1131
1132    clss = __input._getValue();
1133
1134    if(clss == ']' || clss == '-')
1135      skipTest = true;
1136    else
1137      skipTest = false;
1138
1139    while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')
1140    || skipTest)
1141      {
1142  // It sucks, but we have to make this assignment every time
1143  skipTest = false;
1144  opcodeFlag = false;
1145  __input._increment();
1146
1147  if(clss == '\\' || clss == '[') {
1148    if(clss == '\\') {
1149      /* character is escaped */
1150      clss = __input._postIncrement();
1151    } else {
1152      /* try POSIX expression */
1153      char posixOpCode = __parsePOSIX(negFlag);
1154      if(posixOpCode != 0){
1155        opcodeFlag = true;
1156        clss = posixOpCode;
1157      }
1158    }
1159    if (opcodeFlag != true) {
1160      switch(clss){
1161      case 'w':
1162        opcodeFlag = true;
1163        clss = OpCode._ALNUM;
1164        lastclss = Character.MAX_VALUE;
1165        break;
1166      case 'W':
1167        opcodeFlag = true;
1168        clss = OpCode._NALNUM;
1169        lastclss = Character.MAX_VALUE;
1170        break;
1171      case 's':
1172        opcodeFlag = true;
1173        clss = OpCode._SPACE;
1174        lastclss = Character.MAX_VALUE;
1175        break;
1176      case 'S':
1177        opcodeFlag = true;
1178        clss = OpCode._NSPACE;
1179        lastclss = Character.MAX_VALUE;
1180        break;
1181      case 'd':
1182        opcodeFlag = true;
1183        clss = OpCode._DIGIT;
1184        lastclss = Character.MAX_VALUE;
1185        break;
1186      case 'D':
1187        opcodeFlag = true;
1188        clss = OpCode._NDIGIT;
1189        lastclss = Character.MAX_VALUE;
1190        break;
1191      case 'n':
1192        clss = '\n';
1193        break;
1194      case 'r':
1195        clss = '\r';
1196        break;
1197      case 't':
1198        clss = '\t';
1199        break;
1200      case 'f':
1201        clss = '\f';
1202        break;
1203      case 'b':
1204        clss = '\b';
1205        break;
1206      case 'e':
1207        clss = '\033';
1208        break;
1209      case 'a':
1210        clss = '\007';
1211        break;
1212      case 'x':
1213        clss = (char)__parseHex(__input._array, __input._getOffset(), 2,
1214              numLength);
1215        __input._increment(numLength[0]);
1216        break;
1217      case 'c':
1218        clss = __input._postIncrement();
1219        if(Character.isLowerCase(clss))
1220    clss = Character.toUpperCase(clss);
1221        clss ^= 64;
1222        break;
1223      case '0': case '1': case '2': case '3': case '4':
1224      case '5': case '6': case '7': case '8': case '9':
1225        clss = 
1226    (char)__parseOctal(__input._array,
1227           __input._getOffset() - 1, 3, numLength);
1228        __input._increment(numLength[0] - 1);
1229        break;
1230      default:
1231        break;
1232      }
1233    }
1234  }
1235
1236  if(range) {
1237    if(lastclss > clss)
1238      throw new MalformedPatternException(
1239          "Invalid [] range in expression.");
1240    range = false;
1241  } else {
1242    lastclss = clss;
1243
1244    if(opcodeFlag == false &&
1245             __input._getValue() == '-' &&
1246       __input._getOffset() + 1 < __input._getLength() &&
1247       __input._getValueRelative(1) != ']') {
1248      __input._increment();
1249      range = true;
1250      continue;
1251    }
1252  }
1253
1254  if(lastclss == clss) {
1255    if(opcodeFlag == true) {
1256      if(negFlag[0] == false)
1257        __emitCode(OpCode._OPCODE);
1258      else 
1259        __emitCode(OpCode._NOPCODE);
1260    } else
1261      __emitCode(OpCode._ONECHAR);
1262
1263    __emitCode(clss);
1264
1265    if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
1266       Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
1267      __programSize--;
1268      __emitCode(Character.toLowerCase(clss));
1269    }
1270  }
1271
1272  if(lastclss < clss) {
1273    __emitCode(OpCode._RANGE);
1274    __emitCode(lastclss);
1275    __emitCode(clss);
1276
1277    if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
1278       Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
1279      __programSize-=2;
1280      __emitCode(Character.toLowerCase(lastclss));
1281      __emitCode(Character.toLowerCase(clss));
1282
1283    }
1284
1285    lastclss = Character.MAX_VALUE;
1286    range = false;
1287  }
1288    
1289  lastclss = clss;
1290      }
1291
1292    if(__input._getValue() != ']')
1293      throw new MalformedPatternException("Unmatched [] in expression.");
1294
1295    __getNextChar();
1296    __emitCode(OpCode._END);
1297
1298    return offset;
1299  }
1300
1301
1302  /**
1303   * Parse POSIX epxression like [:foo:]. 
1304   * 
1305   * @return OpCode. return 0 when fail parsing POSIX expression.
1306   */
1307  private char __parsePOSIX(boolean negFlag[])
1308    throws MalformedPatternException 
1309  {
1310    int offset = __input._getOffset();
1311    int len = __input._getLength();
1312    int pos = offset;
1313    char value = __input._getValue(pos++);
1314    StringBuffer buf;
1315    Object opcode;
1316
1317    if( value != ':' ) return 0;
1318    if( __input._getValue(pos) == '^' ) {
1319      negFlag[0] = true;
1320      pos++;
1321    } else {
1322      negFlag[0] = false;
1323    }
1324
1325    buf = new StringBuffer();
1326    
1327    try { 
1328      while ( (value = __input._getValue(pos++)) != ':' && pos < len) {
1329  buf.append(value);  
1330      }
1331    } catch (Exception e){
1332      return 0;
1333    }
1334
1335    if( __input._getValue(pos++) != ']'){
1336      return 0;
1337    }
1338
1339    opcode = __hashPOSIX.get(buf.toString());
1340
1341    if( opcode == null )
1342      return 0;
1343
1344    __input._setOffset(pos);
1345    
1346    return ((Character)opcode).charValue();
1347  }
1348
1349
1350  private int __parseBranch(int[] retFlags) throws MalformedPatternException {
1351    boolean nestCheck = false, handleRepetition = false;
1352    int offset, next, min, max, flags[] = { 0 };
1353    char operator, value;
1354
1355    min = 0;
1356    max = Character.MAX_VALUE;
1357    offset = __parseAtom(flags);
1358
1359    if(offset == OpCode._NULL_OFFSET) {
1360      if((flags[0] & __TRYAGAIN) != 0)
1361  retFlags[0] |= __TRYAGAIN;
1362      return OpCode._NULL_OFFSET;
1363    }
1364
1365    operator = __input._getValue();
1366
1367    if(operator == '(' && __input._getValueRelative(1) == '?' &&
1368       __input._getValueRelative(2) == '#') {
1369      while(operator != CharStringPointer._END_OF_STRING && operator != ')')
1370  operator = __input._increment();
1371
1372      if(operator != CharStringPointer._END_OF_STRING) {
1373  __getNextChar();
1374  operator = __input._getValue();
1375      }
1376    }
1377
1378    if(operator == '{' &&
1379       __parseRepetition(__input._array, __input._getOffset())) {
1380      int maxOffset, pos;
1381
1382      next = __input._getOffset() + 1;
1383      pos = maxOffset = __input._getLength();
1384
1385      value = __input._getValue(next);
1386
1387      while(Character.isDigit(value) || value == ',') {
1388  if(value == ',') {
1389    if(pos != maxOffset)
1390      break;
1391    else
1392      pos = next;
1393  }
1394  ++next;
1395  value = __input._getValue(next);
1396      }
1397
1398      if(value == '}') {
1399  int num;
1400  StringBuffer buffer = new StringBuffer(10);
1401
1402  if(pos == maxOffset)
1403    pos = next;
1404  __input._increment();
1405
1406  num = __input._getOffset();
1407  value = __input._getValue(num);
1408
1409  while(Character.isDigit(value)) {
1410    buffer.append(value);
1411    ++num;
1412    value = __input._getValue(num);
1413  }
1414
1415  try {
1416    min = Integer.parseInt(buffer.toString());
1417  } catch(NumberFormatException e) {
1418    throw new MalformedPatternException(
1419   "Unexpected number format exception.  Please report this bug." +
1420     "NumberFormatException message: " + e.getMessage());
1421  }
1422
1423  value = __input._getValue(pos);
1424  if(value == ',')
1425    ++pos;
1426  else
1427    pos = __input._getOffset();
1428
1429  num = pos;
1430  buffer = new StringBuffer(10);
1431
1432  value = __input._getValue(num);
1433
1434  while(Character.isDigit(value)){
1435    buffer.append(value);
1436    ++num;
1437    value = __input._getValue(num);
1438  }
1439
1440  try {
1441    if(num != pos)
1442      max = Integer.parseInt(buffer.toString());
1443  } catch(NumberFormatException e) {
1444    throw new MalformedPatternException(
1445   "Unexpected number format exception.  Please report this bug." +
1446     "NumberFormatException message: " + e.getMessage());
1447  }
1448
1449  if(max == 0 && __input._getValue(pos) != '0')
1450    max = Character.MAX_VALUE;
1451  __input._setOffset(next);
1452  __getNextChar();
1453
1454  nestCheck = true;
1455  handleRepetition = true;
1456      }
1457    }
1458
1459    if(!nestCheck) {
1460      handleRepetition = false;
1461
1462      if(!__isSimpleRepetitionOp(operator)) {
1463  retFlags[0] = flags[0];
1464  return offset;
1465      }
1466
1467      __getNextChar();
1468
1469      retFlags[0] = ((operator != '+') ?
1470      (__WORSTCASE | __SPSTART) : (__WORSTCASE | __NONNULL));
1471
1472      if(operator == '*' && ((flags[0] & __SIMPLE) != 0)) {
1473  __programInsertOperator(OpCode._STAR, offset);
1474  __cost+=4;
1475      } else if(operator == '*') {
1476  min = 0;
1477  handleRepetition = true;
1478      } else if(operator == '+' && (flags[0] & __SIMPLE) != 0) {
1479  __programInsertOperator(OpCode._PLUS, offset);
1480  __cost+=3;
1481      } else if(operator == '+') {
1482  min = 1;
1483  handleRepetition = true;
1484      } else if(operator == '?') {
1485  min = 0;
1486  max = 1;
1487  handleRepetition = true;
1488      }
1489    }
1490
1491    if(handleRepetition) {
1492
1493      // handle repetition
1494      if((flags[0] & __SIMPLE) != 0){
1495  __cost+= ((2 + __cost) / 2);
1496  __programInsertOperator(OpCode._CURLY, offset);
1497      } else {
1498  __cost += (4 + __cost);
1499  __programAddTail(offset, __emitNode(OpCode._WHILEM));
1500  __programInsertOperator(OpCode._CURLYX, offset);
1501  __programAddTail(offset, __emitNode(OpCode._NOTHING));
1502      }
1503
1504      if(min > 0)
1505  retFlags[0] = (__WORSTCASE | __NONNULL);
1506
1507      if(max != 0 && max < min)
1508  throw new MalformedPatternException(
1509       "Invalid interval {" + min + "," + max + "}");
1510
1511      if(__program!= null) {
1512  __program[offset + 2] = (char)min;
1513  __program[offset + 3] = (char)max;
1514      }
1515    }
1516
1517
1518    if(__input._getValue() == '?') {
1519      __getNextChar();
1520      __programInsertOperator(OpCode._MINMOD, offset);
1521      __programAddTail(offset, offset + 2);
1522    }
1523
1524    if(__isComplexRepetitionOp(__input._array, __input._getOffset()))
1525      throw new MalformedPatternException(
1526        "Nested repetitions *?+ in expression");
1527
1528    return offset;
1529  }
1530
1531
1532  private