Source code: org/apache/oro/text/regex/Perl5Compiler.java
1 /*
2 * $Id: Perl5Compiler.java,v 1.21 2003/11/07 20:16:25 dfs Exp $
3 *
4 * ====================================================================
5 * The Apache Software License, Version 1.1
6 *
7 * Copyright (c) 2000 The Apache Software Foundation. All rights
8 * reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 *
22 * 3. The end-user documentation included with the redistribution,
23 * if any, must include the following acknowledgment:
24 * "This product includes software developed by the
25 * Apache Software Foundation (http://www.apache.org/)."
26 * Alternately, this acknowledgment may appear in the software itself,
27 * if and wherever such third-party acknowledgments normally appear.
28 *
29 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
30 * must not be used to endorse or promote products derived from this
31 * software without prior written permission. For written
32 * permission, please contact apache@apache.org.
33 *
34 * 5. Products derived from this software may not be called "Apache"
35 * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
36 * name, without prior written permission of the Apache Software Foundation.
37 *
38 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
39 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
40 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
41 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
45 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
46 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
47 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
48 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49 * SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This software consists of voluntary contributions made by many
53 * individuals on behalf of the Apache Software Foundation. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58
59 package org.apache.oro.text.regex;
60
61 import java.util.*;
62
63 /**
64 * The Perl5Compiler class is used to create compiled regular expressions
65 * conforming to the Perl5 regular expression syntax. It generates
66 * Perl5Pattern instances upon compilation to be used in conjunction
67 * with a Perl5Matcher instance. Please see the user's guide for more
68 * information about Perl5 regular expressions.
69 * <p>
70 * Perl5Compiler and Perl5Matcher are designed with the intent that
71 * you use a separate instance of each per thread to avoid the overhead
72 * of both synchronization and concurrent access (e.g., a match that takes
73 * a long time in one thread will block the progress of another thread with
74 * a shorter match). If you want to use a single instance of each
75 * in a concurrent program, you must appropriately protect access to
76 * the instances with critical sections. If you want to share Perl5Pattern
77 * instances between concurrently executing instances of Perl5Matcher, you
78 * must compile the patterns with {@link Perl5Compiler#READ_ONLY_MASK}.
79 *
80 * @version @version@
81 * @since 1.0
82 * @see PatternCompiler
83 * @see MalformedPatternException
84 * @see Perl5Pattern
85 * @see Perl5Matcher
86 */
87
88 public final class Perl5Compiler implements PatternCompiler {
89 private static final int __WORSTCASE = 0, __NONNULL = 0x1, __SIMPLE = 0x2,
90 __SPSTART = 0x4, __TRYAGAIN = 0x8;
91
92 private static final char
93 __CASE_INSENSITIVE = 0x0001,
94 __GLOBAL = 0x0002,
95 __KEEP = 0x0004,
96 __MULTILINE = 0x0008,
97 __SINGLELINE = 0x0010,
98 __EXTENDED = 0x0020,
99 __READ_ONLY = 0x8000;
100
101 private static final String __HEX_DIGIT =
102 "0123456789abcdef0123456789ABCDEFx";
103 private CharStringPointer __input;
104 private boolean __sawBackreference;
105 private char[] __modifierFlags = { 0 };
106
107 // IMPORTANT: __numParentheses starts out equal to 1 during compilation.
108 // It is always one greater than the number of parentheses encountered
109 // so far in the regex. That is because it refers to the number of groups
110 // to save, and the entire match is always saved (group 0)
111 private int __numParentheses, __programSize, __cost;
112
113 // When doing the second pass and actually generating code, __programSize
114 // keeps track of the current offset.
115 private char[] __program;
116
117 /** Lookup table for POSIX character class names */
118 private static final HashMap __hashPOSIX;
119
120 static {
121 __hashPOSIX = new HashMap();
122 __hashPOSIX.put("alnum", new Character(OpCode._ALNUMC));
123 __hashPOSIX.put("word", new Character(OpCode._ALNUM));
124 __hashPOSIX.put("alpha", new Character(OpCode._ALPHA));
125 __hashPOSIX.put("blank", new Character(OpCode._BLANK));
126 __hashPOSIX.put("cntrl", new Character(OpCode._CNTRL));
127 __hashPOSIX.put("digit", new Character(OpCode._DIGIT));
128 __hashPOSIX.put("graph", new Character(OpCode._GRAPH));
129 __hashPOSIX.put("lower", new Character(OpCode._LOWER));
130 __hashPOSIX.put("print", new Character(OpCode._PRINT));
131 __hashPOSIX.put("punct", new Character(OpCode._PUNCT));
132 __hashPOSIX.put("space", new Character(OpCode._SPACE));
133 __hashPOSIX.put("upper", new Character(OpCode._UPPER));
134 __hashPOSIX.put("xdigit", new Character(OpCode._XDIGIT));
135 __hashPOSIX.put("ascii", new Character(OpCode._ASCII));
136 }
137
138
139 /**
140 * The default mask for the {@link #compile compile} methods.
141 * It is equal to 0.
142 * The default behavior is for a regular expression to be case sensitive
143 * and to not specify if it is multiline or singleline. When MULITLINE_MASK
144 * and SINGLINE_MASK are not defined, the <b>^</b>, <b>$</b>, and <b>.</b>
145 * metacharacters are
146 * interpreted according to the value of isMultiline() in Perl5Matcher.
147 * The default behavior of Perl5Matcher is to treat the Perl5Pattern
148 * as though MULTILINE_MASK were enabled. If isMultiline() returns false,
149 * then the pattern is treated as though SINGLINE_MASK were set. However,
150 * compiling a pattern with the MULTILINE_MASK or SINGLELINE_MASK masks
151 * will ALWAYS override whatever behavior is specified by the setMultiline()
152 * in Perl5Matcher.
153 */
154 public static final int DEFAULT_MASK = 0;
155
156 /**
157 * A mask passed as an option to the {@link #compile compile} methods
158 * to indicate a compiled regular expression should be case insensitive.
159 */
160 public static final int CASE_INSENSITIVE_MASK = __CASE_INSENSITIVE;
161
162 /**
163 * A mask passed as an option to the {@link #compile compile} methods
164 * to indicate a compiled regular expression should treat input as having
165 * multiple lines. This option affects the interpretation of
166 * the <b>^</b> and <b>$</b> metacharacters. When this mask is used,
167 * the <b>^</b> metacharacter matches at the beginning of every line,
168 * and the <b>$</b> metacharacter matches at the end of every line.
169 * Additionally the <b> . </b> metacharacter will not match newlines when
170 * an expression is compiled with <b> MULTILINE_MASK </b>, which is its
171 * default behavior.
172 */
173 public static final int MULTILINE_MASK = __MULTILINE;
174
175 /**
176 * A mask passed as an option to the {@link #compile compile} methods
177 * to indicate a compiled regular expression should treat input as being
178 * a single line. This option affects the interpretation of
179 * the <b>^</b> and <b>$</b> metacharacters. When this mask is used,
180 * the <b>^</b> metacharacter matches at the beginning of the input,
181 * and the <b>$</b> metacharacter matches at the end of the input.
182 * The <b>^</b> and <b>$</b> metacharacters will not match at the beginning
183 * and end of lines occurring between the begnning and end of the input.
184 * Additionally, the <b> . </b> metacharacter will match newlines when
185 * an expression is compiled with <b> SINGLELINE_MASK </b>, unlike its
186 * default behavior.
187 */
188 public static final int SINGLELINE_MASK = __SINGLELINE;
189
190 /**
191 * A mask passed as an option to the {@link #compile compile} methods
192 * to indicate a compiled regular expression should be treated as a Perl5
193 * extended pattern (i.e., a pattern using the <b>/x</b> modifier). This
194 * option tells the compiler to ignore whitespace that is not backslashed or
195 * within a character class. It also tells the compiler to treat the
196 * <b>#</b> character as a metacharacter introducing a comment as in
197 * Perl. In other words, the <b>#</b> character will comment out any
198 * text in the regular expression between it and the next newline.
199 * The intent of this option is to allow you to divide your patterns
200 * into more readable parts. It is provided to maintain compatibility
201 * with Perl5 regular expressions, although it will not often
202 * make sense to use it in Java.
203 */
204 public static final int EXTENDED_MASK = __EXTENDED;
205
206 /**
207 * A mask passed as an option to the {@link #compile compile} methods
208 * to indicate that the resulting Perl5Pattern should be treated as a
209 * read only data structure by Perl5Matcher, making it safe to share
210 * a single Perl5Pattern instance among multiple threads without needing
211 * synchronization. Without this option, Perl5Matcher reserves the right
212 * to store heuristic or other information in Perl5Pattern that might
213 * accelerate future matches. When you use this option, Perl5Matcher will
214 * not store or modify any information in a Perl5Pattern. Use this option
215 * when you want to share a Perl5Pattern instance among multiple threads
216 * using different Perl5Matcher instances.
217 */
218 public static final int READ_ONLY_MASK = __READ_ONLY;
219
220 /**
221 * Given a character string, returns a Perl5 expression that interprets
222 * each character of the original string literally. In other words, all
223 * special metacharacters are quoted/escaped. This method is useful for
224 * converting user input meant for literal interpretation into a safe
225 * regular expression representing the literal input.
226 * <p>
227 * In effect, this method is the analog of the Perl5 quotemeta() builtin
228 * method.
229 * <p>
230 * @param expression The expression to convert.
231 * @return A String containing a Perl5 regular expression corresponding to
232 * a literal interpretation of the pattern.
233 */
234 public static final String quotemeta(char[] expression) {
235 int ch;
236 StringBuffer buffer;
237
238 buffer = new StringBuffer(2*expression.length);
239 for(ch = 0; ch < expression.length; ch++) {
240 if(!OpCode._isWordCharacter(expression[ch]))
241 buffer.append('\\');
242 buffer.append(expression[ch]);
243 }
244
245 return buffer.toString();
246 }
247
248 /**
249 * Given a character string, returns a Perl5 expression that interprets
250 * each character of the original string literally. In other words, all
251 * special metacharacters are quoted/escaped. This method is useful for
252 * converting user input meant for literal interpretation into a safe
253 * regular expression representing the literal input.
254 * <p>
255 * In effect, this method is the analog of the Perl5 quotemeta() builtin
256 * method.
257 * <p>
258 * @param pattern The pattern to convert.
259 * @return A String containing a Perl5 regular expression corresponding to
260 * a literal interpretation of the pattern.
261 */
262 public static final String quotemeta(String expression) {
263 return quotemeta(expression.toCharArray());
264 }
265
266 private static boolean __isSimpleRepetitionOp(char ch) {
267 return (ch == '*' || ch == '+' || ch == '?');
268 }
269
270 private static boolean __isComplexRepetitionOp(char[] ch, int offset) {
271 if(offset < ch.length && offset >= 0)
272 return (ch[offset] == '*' || ch[offset] == '+' || ch[offset] == '?'
273 || (ch[offset] == '{' && __parseRepetition(ch, offset)));
274 return false;
275 }
276
277 // determines if {\d+,\d*} is the next part of the string
278 private static boolean __parseRepetition(char[] str, int offset) {
279 if(str[offset] != '{')
280 return false;
281 ++offset;
282
283 if(offset >= str.length || !Character.isDigit(str[offset]))
284 return false;
285
286 while(offset < str.length && Character.isDigit(str[offset]))
287 ++offset;
288
289 if(offset < str.length && str[offset] == ',')
290 ++offset;
291
292 while(offset < str.length && Character.isDigit(str[offset]))
293 ++offset;
294
295 if(offset >= str.length || str[offset] != '}')
296 return false;
297
298 return true;
299 }
300
301 private static int __parseHex(char[] str, int offset, int maxLength,
302 int[] scanned)
303 {
304 int val = 0, index;
305
306 scanned[0] = 0;
307 while(offset < str.length && maxLength-- > 0 &&
308 (index = __HEX_DIGIT.indexOf(str[offset])) != -1) {
309 val <<= 4;
310 val |= (index & 15);
311 ++offset;
312 ++scanned[0];
313 }
314
315 return val;
316 }
317
318 private static int __parseOctal(char[] str, int offset, int maxLength,
319 int[] scanned)
320 {
321 int val = 0;
322
323 scanned[0] = 0;
324 while(offset < str.length &&
325 maxLength > 0 && str[offset] >= '0' && str[offset] <= '7') {
326 val <<= 3;
327 val |= (str[offset] - '0');
328 --maxLength;
329 ++offset;
330 ++scanned[0];
331 }
332
333 return val;
334 }
335
336 private static void __setModifierFlag(char[] flags, char ch) {
337 switch(ch) {
338 case 'i' : flags[0] |= __CASE_INSENSITIVE; return;
339 case 'g' : flags[0] |= __GLOBAL; return;
340 case 'o' : flags[0] |= __KEEP; return;
341 case 'm' : flags[0] |= __MULTILINE; return;
342 case 's' : flags[0] |= __SINGLELINE; return;
343 case 'x' : flags[0] |= __EXTENDED; return;
344 }
345 }
346
347 // Emit a specific character code.
348 private void __emitCode(char code) {
349
350 if(__program != null)
351 __program[__programSize] = code;
352
353 ++__programSize;
354 }
355
356
357 // Emit an operator with no arguments.
358 // Return an offset into the __program array as a pointer to node.
359 private int __emitNode(char operator) {
360 int offset;
361
362 offset = __programSize;
363
364 if(__program == null)
365 __programSize+=2;
366 else {
367 __program[__programSize++] = operator;
368 __program[__programSize++] = OpCode._NULL_POINTER;
369 }
370
371 return offset;
372 }
373
374
375 // Emit an operator with arguments.
376 // Return an offset into the __programarray as a pointer to node.
377 private int __emitArgNode(char operator, char arg) {
378 int offset;
379
380 offset = __programSize;
381
382 if(__program== null)
383 __programSize+=3;
384 else {
385 __program[__programSize++] = operator;
386 __program[__programSize++] = OpCode._NULL_POINTER;
387 __program[__programSize++] = arg;
388 }
389
390 return offset;
391 }
392
393
394 // Insert an operator at a given offset.
395 private void __programInsertOperator(char operator, int operand) {
396 int src, dest, offset;
397
398 offset = (OpCode._opType[operator] == OpCode._CURLY ? 2 : 0);
399
400
401 if(__program== null) {
402 __programSize+=(2 + offset);
403 return;
404 }
405
406 src = __programSize;
407 __programSize+=(2 + offset);
408 dest = __programSize;
409
410 while(src > operand) {
411 --src;
412 --dest;
413 __program[dest] = __program[src];
414 }
415
416 __program[operand++] = operator;
417 __program[operand++] = OpCode._NULL_POINTER;
418
419 while(offset-- > 0)
420 __program[operand++] = OpCode._NULL_POINTER;
421
422 }
423
424
425
426 private void __programAddTail(int current, int value) {
427 int scan, temp, offset;
428 if(__program == null || current == OpCode._NULL_OFFSET)
429 return;
430
431 scan = current;
432
433 while(true) {
434 temp = OpCode._getNext(__program, scan);
435 if(temp == OpCode._NULL_OFFSET)
436 break;
437 scan = temp;
438 }
439
440 if(__program[scan] == OpCode._BACK)
441 offset = scan - value;
442 else
443 offset = value - scan;
444
445 __program[scan + 1] = (char)offset;
446 }
447
448
449 private void __programAddOperatorTail(int current, int value) {
450 if(__program == null || current == OpCode._NULL_OFFSET ||
451 OpCode._opType[__program[current]] != OpCode._BRANCH)
452 return;
453 __programAddTail(OpCode._getNextOperator(current), value);
454 }
455
456
457 private char __getNextChar() {
458 char ret, value;
459
460 ret = __input._postIncrement();
461
462 while(true) {
463 value = __input._getValue();
464
465 if(value == '(' && __input._getValueRelative(1) == '?' &&
466 __input._getValueRelative(2) == '#') {
467 // Skip comments
468 while(value != CharStringPointer._END_OF_STRING && value != ')')
469 value = __input._increment();
470 __input._increment();
471 continue;
472 }
473
474 if((__modifierFlags[0] & __EXTENDED) != 0) {
475 if(Character.isWhitespace(value)) {
476 __input._increment();
477 continue;
478 } else if(value == '#') {
479 while(value != CharStringPointer._END_OF_STRING && value != '\n')
480 value = __input._increment();
481 __input._increment();
482 continue;
483 }
484 }
485
486 return ret;
487 }
488
489 }
490
491
492 private int __parseAlternation(int[] retFlags)
493 throws MalformedPatternException
494 {
495 int chain, offset, latest;
496 int flags = 0;
497 char value;
498
499 retFlags[0] = __WORSTCASE;
500
501 offset = __emitNode(OpCode._BRANCH);
502
503 chain = OpCode._NULL_OFFSET;
504
505 if(__input._getOffset() == 0) {
506 __input._setOffset(-1);
507 __getNextChar();
508 } else {
509 __input._decrement();
510 __getNextChar();
511 }
512
513 value = __input._getValue();
514
515 while(value != CharStringPointer._END_OF_STRING &&
516 value != '|' && value != ')') {
517 flags &= ~__TRYAGAIN;
518 latest = __parseBranch(retFlags);
519
520 if(latest == OpCode._NULL_OFFSET) {
521 if((flags & __TRYAGAIN) != 0){
522 value = __input._getValue();
523 continue;
524 }
525 return OpCode._NULL_OFFSET;
526 }
527
528 retFlags[0] |= (flags & __NONNULL);
529
530 if(chain == OpCode._NULL_OFFSET)
531 retFlags[0] |= (flags & __SPSTART);
532 else {
533 ++__cost;
534 __programAddTail(chain, latest);
535 }
536 chain = latest;
537 value = __input._getValue();
538 }
539
540 // If loop was never entered.
541 if(chain == OpCode._NULL_OFFSET)
542 __emitNode(OpCode._NOTHING);
543
544 return offset;
545 }
546
547
548 private int __parseAtom(int[] retFlags) throws MalformedPatternException {
549 boolean doDefault;
550 char value;
551 int offset, flags[] = { 0 };
552
553
554 retFlags[0] = __WORSTCASE;
555 doDefault = false;
556 offset = OpCode._NULL_OFFSET;
557
558 tryAgain:
559 while(true) {
560
561 value = __input._getValue();
562
563 switch(value) {
564 case '^' :
565 __getNextChar();
566 // The order here is important in order to support /ms.
567 // /m takes precedence over /s for ^ and $, but not for .
568 if((__modifierFlags[0] & __MULTILINE) != 0)
569 offset = __emitNode(OpCode._MBOL);
570 else if((__modifierFlags[0] & __SINGLELINE) != 0)
571 offset = __emitNode(OpCode._SBOL);
572 else
573 offset = __emitNode(OpCode._BOL);
574 break tryAgain;
575
576 case '$':
577 __getNextChar();
578 // The order here is important in order to support /ms.
579 // /m takes precedence over /s for ^ and $, but not for .
580 if((__modifierFlags[0] & __MULTILINE) != 0)
581 offset = __emitNode(OpCode._MEOL);
582 else if((__modifierFlags[0] & __SINGLELINE) != 0)
583 offset = __emitNode(OpCode._SEOL);
584 else
585 offset = __emitNode(OpCode._EOL);
586 break tryAgain;
587
588 case '.':
589 __getNextChar();
590 // The order here is important in order to support /ms.
591 // /m takes precedence over /s for ^ and $, but not for .
592 if((__modifierFlags[0] & __SINGLELINE) != 0)
593 offset = __emitNode(OpCode._SANY);
594 else
595 offset = __emitNode(OpCode._ANY);
596 ++__cost;
597 retFlags[0] |= (__NONNULL | __SIMPLE);
598 break tryAgain;
599
600 case '[':
601 __input._increment();
602 offset = __parseUnicodeClass();
603 retFlags[0] |= (__NONNULL | __SIMPLE);
604 break tryAgain;
605
606 case '(':
607 __getNextChar();
608 offset = __parseExpression(true, flags);
609 if(offset == OpCode._NULL_OFFSET) {
610 if((flags[0] & __TRYAGAIN) != 0)
611 continue tryAgain;
612 return OpCode._NULL_OFFSET;
613 }
614 retFlags[0] |= (flags[0] & (__NONNULL | __SPSTART));
615 break tryAgain;
616
617 case '|':
618 case ')':
619 if((flags[0] & __TRYAGAIN) != 0) {
620 retFlags[0] |= __TRYAGAIN;
621 return OpCode._NULL_OFFSET;
622 }
623
624 throw new MalformedPatternException("Error in expression at " +
625 __input._toString(__input._getOffset()));
626 //break tryAgain;
627
628 case '?':
629 case '+':
630 case '*':
631 throw new MalformedPatternException(
632 "?+* follows nothing in expression");
633 //break tryAgain;
634
635 case '\\':
636 value = __input._increment();
637
638 switch(value) {
639 case 'A' :
640 offset = __emitNode(OpCode._SBOL);
641 retFlags[0] |= __SIMPLE;
642 __getNextChar();
643 break;
644 case 'G':
645 offset = __emitNode(OpCode._GBOL);
646 retFlags[0] |= __SIMPLE;
647 __getNextChar();
648 break;
649 case 'Z':
650 offset = __emitNode(OpCode._SEOL);
651 retFlags[0] |= __SIMPLE;
652 __getNextChar();
653 break;
654 case 'w':
655 offset = __emitNode(OpCode._ALNUM);
656 retFlags[0] |= (__NONNULL | __SIMPLE);
657 __getNextChar();
658 break;
659 case 'W':
660 offset = __emitNode(OpCode._NALNUM);
661 retFlags[0] |= (__NONNULL | __SIMPLE);
662 __getNextChar();
663 break;
664 case 'b':
665 offset = __emitNode(OpCode._BOUND);
666 retFlags[0] |= __SIMPLE;
667 __getNextChar();
668 break;
669 case 'B':
670 offset = __emitNode(OpCode._NBOUND);
671 retFlags[0] |= __SIMPLE;
672 __getNextChar();
673 break;
674 case 's':
675 offset = __emitNode(OpCode._SPACE);
676 retFlags[0] |= (__NONNULL | __SIMPLE);
677 __getNextChar();
678 break;
679 case 'S':
680 offset = __emitNode(OpCode._NSPACE);
681 retFlags[0] |= (__NONNULL | __SIMPLE);
682 __getNextChar();
683 break;
684 case 'd':
685 offset = __emitNode(OpCode._DIGIT);
686 retFlags[0] |= (__NONNULL | __SIMPLE);
687 __getNextChar();
688 break;
689 case 'D':
690 offset = __emitNode(OpCode._NDIGIT);
691 retFlags[0] |= (__NONNULL | __SIMPLE);
692 __getNextChar();
693 break;
694 case 'n': case 'r': case 't': case 'f': case 'e': case 'a': case 'x':
695 case 'c': case '0':
696 doDefault = true;
697 break tryAgain;
698 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
699 case '8': case '9':
700 int num;
701 StringBuffer buffer = new StringBuffer(10);
702
703 num = 0;
704 value = __input._getValueRelative(num);
705
706 while(Character.isDigit(value)) {
707 buffer.append(value);
708 ++num;
709 value = __input._getValueRelative(num);
710 }
711
712 try {
713 num = Integer.parseInt(buffer.toString());
714 } catch(NumberFormatException e) {
715 throw new MalformedPatternException(
716 "Unexpected number format exception. Please report this bug." +
717 "NumberFormatException message: " + e.getMessage());
718 }
719
720 if(num > 9 && num >= __numParentheses) {
721 doDefault = true;
722 break tryAgain;
723 } else {
724 // A backreference may only occur AFTER its group
725 if(num >= __numParentheses)
726 throw new MalformedPatternException("Invalid backreference: \\" +
727 num);
728 __sawBackreference = true;
729 offset = __emitArgNode(OpCode._REF, (char)num);
730 retFlags[0] |= __NONNULL;
731
732 value = __input._getValue();
733 while(Character.isDigit(value))
734 value = __input._increment();
735
736 __input._decrement();
737 __getNextChar();
738 }
739 break;
740 case '\0':
741 case CharStringPointer._END_OF_STRING:
742 if(__input._isAtEnd())
743 throw new
744 MalformedPatternException("Trailing \\ in expression.");
745
746 // fall through to default
747 default:
748 doDefault = true;
749 break tryAgain;
750 }
751 break tryAgain;
752
753 case '#':
754 // skip over comments
755 if((__modifierFlags[0] & __EXTENDED) != 0) {
756 while(!__input._isAtEnd() && __input._getValue() != '\n')
757 __input._increment();
758 if(!__input._isAtEnd())
759 continue tryAgain;
760 }
761 // fall through to default
762 default:
763 __input._increment();
764 doDefault = true;
765 break tryAgain;
766 }// end master switch
767 } // end tryAgain
768
769
770 if(doDefault) {
771 char ender;
772 int length, pOffset, maxOffset, lastOffset, numLength[];
773
774 offset = __emitNode(OpCode._EXACTLY);
775 // Not sure that it's ok to use 0 to mark end.
776 //__emitCode((char)0);
777 __emitCode((char)CharStringPointer._END_OF_STRING);
778
779 forLoop:
780 for(length = 0, pOffset = __input._getOffset() - 1,
781 maxOffset = __input._getLength();
782 length < 127 && pOffset < maxOffset; ++length) {
783
784 lastOffset = pOffset;
785 value = __input._getValue(pOffset);
786
787 switch(value) {
788 case '^': case '$': case '.': case '[': case '(': case ')':
789 case '|':
790 break forLoop;
791 case '\\':
792 value = __input._getValue(++pOffset);
793
794 switch(value) {
795 case 'A': case 'G': case 'Z': case 'w': case 'W': case 'b':
796 case 'B': case 's': case 'S': case 'd': case 'D':
797 --pOffset;
798 break forLoop;
799 case 'n':
800 ender = '\n';
801 ++pOffset;
802 break;
803 case 'r':
804 ender = '\r';
805 ++pOffset;
806 break;
807 case 't':
808 ender = '\t';
809 ++pOffset;
810 break;
811 case 'f':
812 ender = '\f';
813 ++pOffset;
814 break;
815 case 'e':
816 ender = '\033';
817 ++pOffset;
818 break;
819 case 'a':
820 ender = '\007';
821 ++pOffset;
822 break;
823 case 'x':
824 numLength = new int[1];
825 ender = (char)__parseHex(__input._array, ++pOffset, 2, numLength);
826 pOffset+=numLength[0];
827 break;
828 case 'c':
829 ++pOffset;
830 ender = __input._getValue(pOffset++);
831 if(Character.isLowerCase(ender))
832 ender = Character.toUpperCase(ender);
833 ender ^= 64;
834 break;
835 case '0': case '1': case '2': case'3': case '4': case '5':
836 case '6': case '7': case '8': case '9':
837 boolean doOctal = false;
838 value = __input._getValue(pOffset);
839
840 if(value == '0')
841 doOctal = true;
842 value = __input._getValue(pOffset + 1);
843
844 if(Character.isDigit(value)) {
845 int num;
846 StringBuffer buffer = new StringBuffer(10);
847
848 num = pOffset;
849 value = __input._getValue(num);
850
851 while(Character.isDigit(value)){
852 buffer.append(value);
853 ++num;
854 value = __input._getValue(num);
855 }
856
857 try {
858 num = Integer.parseInt(buffer.toString());
859 } catch(NumberFormatException e) {
860 throw new MalformedPatternException(
861 "Unexpected number format exception. Please report this bug." +
862 "NumberFormatException message: " + e.getMessage());
863 }
864
865 if(!doOctal)
866 doOctal = (num >= __numParentheses);
867 }
868
869 if(doOctal) {
870 numLength = new int[1];
871 ender = (char)__parseOctal(__input._array, pOffset, 3, numLength);
872 pOffset+=numLength[0];
873 } else {
874 --pOffset;
875 break forLoop;
876 }
877 break;
878 case CharStringPointer._END_OF_STRING:
879 case '\0':
880 if(pOffset >= maxOffset)
881 throw new
882 MalformedPatternException("Trailing \\ in expression.");
883 // fall through to default
884 default:
885 ender = __input._getValue(pOffset++);
886 break;
887 } // end backslash switch
888 break;
889 case '#':
890 if((__modifierFlags[0] & __EXTENDED) != 0) {
891 while(pOffset < maxOffset && __input._getValue(pOffset) != '\n')
892 ++pOffset;
893 }
894 // fall through to whitespace handling
895 case ' ': case '\t': case '\n': case '\r': case '\f': case '\013':
896 if((__modifierFlags[0] & __EXTENDED) != 0) {
897 ++pOffset;
898 --length;
899 continue;
900 }
901 // fall through to default
902 default:
903 ender = __input._getValue(pOffset++);
904 break;
905
906 } // end master switch
907
908 if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
909 Character.isUpperCase(ender))
910 ender = Character.toLowerCase(ender);
911
912 if(pOffset < maxOffset && __isComplexRepetitionOp(__input._array, pOffset)) {
913 if(length > 0)
914 pOffset = lastOffset;
915 else {
916 ++length;
917 __emitCode(ender);
918 }
919 break;
920 }
921
922 __emitCode(ender);
923
924
925 } // end for loop
926
927
928 __input._setOffset(pOffset - 1);
929 __getNextChar();
930
931 if(length < 0)
932 throw new MalformedPatternException(
933 "Unexpected compilation failure. Please report this bug!");
934 if(length > 0)
935 retFlags[0] |= __NONNULL;
936 if(length == 1)
937 retFlags[0] |= __SIMPLE;
938 if(__program!= null)
939 __program[OpCode._getOperand(offset)] = (char)length;
940 //__emitCode('\0'); // debug
941 __emitCode(CharStringPointer._END_OF_STRING);
942 }
943
944 return offset;
945 }
946
947 // These are the original 8-bit character class handling methods.
948 // We don't want to delete them just yet only to have to dig it out
949 // of revision control later.
950 /*
951 // Set the bits in a character class. Only recognizes ascii.
952 private void __setCharacterClassBits(char[] bits, int offset, char deflt,
953 char ch)
954 {
955 if(__program== null || ch >= 256)
956 return;
957 ch &= 0xffff;
958
959 if(deflt == 0) {
960 bits[offset + (ch >> 4)] |= (1 << (ch & 0xf));
961 } else {
962 bits[offset + (ch >> 4)] &= ~(1 << (ch & 0xf));
963 }
964 }
965
966 private int __parseCharacterClass() throws MalformedPatternException {
967 boolean range = false, skipTest;
968 char clss, deflt, lastclss = Character.MAX_VALUE;
969 int offset, bits, numLength[] = { 0 };
970
971 offset = __emitNode(OpCode._ANYOF);
972
973 if(__input._getValue() == '^') {
974 ++__cost;
975 __input._increment();
976 deflt = 0;
977 } else {
978 deflt = 0xffff;
979 }
980
981 bits = __programSize;
982 for(clss = 0; clss < 16; clss++)
983 __emitCode(deflt);
984
985 clss = __input._getValue();
986
987 if(clss == ']' || clss == '-')
988 skipTest = true;
989 else
990 skipTest = false;
991
992 while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')
993 || skipTest) {
994 // It sucks, but we have to make this assignment every time
995 skipTest = false;
996 __input._increment();
997 if(clss == '\\') {
998 clss = __input._postIncrement();
999
1000 switch(clss){
1001 case 'w':
1002 for(clss = 0; clss < 256; clss++)
1003 if(OpCode._isWordCharacter(clss))
1004 __setCharacterClassBits(__program, bits, deflt, clss);
1005 lastclss = Character.MAX_VALUE;
1006 continue;
1007 case 'W':
1008 for(clss = 0; clss < 256; clss++)
1009 if(!OpCode._isWordCharacter(clss))
1010 __setCharacterClassBits(__program, bits, deflt, clss);
1011 lastclss = Character.MAX_VALUE;
1012 continue;
1013 case 's':
1014 for(clss = 0; clss < 256; clss++)
1015 if(Character.isWhitespace(clss))
1016 __setCharacterClassBits(__program, bits, deflt, clss);
1017 lastclss = Character.MAX_VALUE;
1018 continue;
1019 case 'S':
1020 for(clss = 0; clss < 256; clss++)
1021 if(!Character.isWhitespace(clss))
1022 __setCharacterClassBits(__program, bits, deflt, clss);
1023 lastclss = Character.MAX_VALUE;
1024 continue;
1025 case 'd':
1026 for(clss = '0'; clss <= '9'; clss++)
1027 __setCharacterClassBits(__program, bits, deflt, clss);
1028 lastclss = Character.MAX_VALUE;
1029 continue;
1030 case 'D':
1031 for(clss = 0; clss < '0'; clss++)
1032 __setCharacterClassBits(__program, bits, deflt, clss);
1033 for(clss = (char)('9' + 1); clss < 256; clss++)
1034 __setCharacterClassBits(__program, bits, deflt, clss);
1035 lastclss = Character.MAX_VALUE;
1036 continue;
1037 case 'n':
1038 clss = '\n';
1039 break;
1040 case 'r':
1041 clss = '\r';
1042 break;
1043 case 't':
1044 clss = '\t';
1045 break;
1046 case 'f':
1047 clss = '\f';
1048 break;
1049 case 'b':
1050 clss = '\b';
1051 break;
1052 case 'e':
1053 clss = '\033';
1054 break;
1055 case 'a':
1056 clss = '\007';
1057 break;
1058 case 'x':
1059 clss = (char)__parseHex(__input._array, __input._getOffset(), 2,
1060 numLength);
1061 __input._increment(numLength[0]);
1062 break;
1063 case 'c':
1064 clss = __input._postIncrement();
1065 if(Character.isLowerCase(clss))
1066 clss = Character.toUpperCase(clss);
1067 clss ^= 64;
1068 break;
1069 case '0': case '1': case '2': case '3': case '4':
1070 case '5': case '6': case '7': case '8': case '9':
1071 clss = (char)__parseOctal(__input._array, __input._getOffset() - 1,
1072 3, numLength);
1073 __input._increment(numLength[0] - 1);
1074 break;
1075 }
1076 }
1077
1078 if(range) {
1079 if(lastclss > clss)
1080 throw new MalformedPatternException(
1081 "Invalid [] range in expression.");
1082 range = false;
1083 } else {
1084 lastclss = clss;
1085
1086 if(__input._getValue() == '-' &&
1087 __input._getOffset() + 1 < __input._getLength() &&
1088 __input._getValueRelative(1) != ']') {
1089 __input._increment();
1090 range = true;
1091 continue;
1092 }
1093 }
1094
1095 while(lastclss <= clss) {
1096 __setCharacterClassBits(__program, bits, deflt, lastclss);
1097 if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
1098 Character.isUpperCase(lastclss))
1099 __setCharacterClassBits(__program, bits, deflt,
1100 Character.toLowerCase(lastclss));
1101
1102 ++lastclss;
1103 }
1104
1105 lastclss = clss;
1106 }
1107
1108 if(__input._getValue() != ']')
1109 throw new MalformedPatternException("Unmatched [] in expression.");
1110
1111 __getNextChar();
1112
1113 return offset;
1114 }
1115 */
1116
1117 private int __parseUnicodeClass() throws MalformedPatternException {
1118 boolean range = false, skipTest;
1119 char clss, lastclss = Character.MAX_VALUE;
1120
1121 int offset, numLength[] = { 0 };
1122 boolean negFlag[] = { false };
1123 boolean opcodeFlag; /* clss isn't character when this flag true. */
1124
1125 if(__input._getValue() == '^') {
1126 offset = __emitNode(OpCode._NANYOFUN);
1127 __input._increment();
1128 } else {
1129 offset = __emitNode(OpCode._ANYOFUN);
1130 }
1131
1132 clss = __input._getValue();
1133
1134 if(clss == ']' || clss == '-')
1135 skipTest = true;
1136 else
1137 skipTest = false;
1138
1139 while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')
1140 || skipTest)
1141 {
1142 // It sucks, but we have to make this assignment every time
1143 skipTest = false;
1144 opcodeFlag = false;
1145 __input._increment();
1146
1147 if(clss == '\\' || clss == '[') {
1148 if(clss == '\\') {
1149 /* character is escaped */
1150 clss = __input._postIncrement();
1151 } else {
1152 /* try POSIX expression */
1153 char posixOpCode = __parsePOSIX(negFlag);
1154 if(posixOpCode != 0){
1155 opcodeFlag = true;
1156 clss = posixOpCode;
1157 }
1158 }
1159 if (opcodeFlag != true) {
1160 switch(clss){
1161 case 'w':
1162 opcodeFlag = true;
1163 clss = OpCode._ALNUM;
1164 lastclss = Character.MAX_VALUE;
1165 break;
1166 case 'W':
1167 opcodeFlag = true;
1168 clss = OpCode._NALNUM;
1169 lastclss = Character.MAX_VALUE;
1170 break;
1171 case 's':
1172 opcodeFlag = true;
1173 clss = OpCode._SPACE;
1174 lastclss = Character.MAX_VALUE;
1175 break;
1176 case 'S':
1177 opcodeFlag = true;
1178 clss = OpCode._NSPACE;
1179 lastclss = Character.MAX_VALUE;
1180 break;
1181 case 'd':
1182 opcodeFlag = true;
1183 clss = OpCode._DIGIT;
1184 lastclss = Character.MAX_VALUE;
1185 break;
1186 case 'D':
1187 opcodeFlag = true;
1188 clss = OpCode._NDIGIT;
1189 lastclss = Character.MAX_VALUE;
1190 break;
1191 case 'n':
1192 clss = '\n';
1193 break;
1194 case 'r':
1195 clss = '\r';
1196 break;
1197 case 't':
1198 clss = '\t';
1199 break;
1200 case 'f':
1201 clss = '\f';
1202 break;
1203 case 'b':
1204 clss = '\b';
1205 break;
1206 case 'e':
1207 clss = '\033';
1208 break;
1209 case 'a':
1210 clss = '\007';
1211 break;
1212 case 'x':
1213 clss = (char)__parseHex(__input._array, __input._getOffset(), 2,
1214 numLength);
1215 __input._increment(numLength[0]);
1216 break;
1217 case 'c':
1218 clss = __input._postIncrement();
1219 if(Character.isLowerCase(clss))
1220 clss = Character.toUpperCase(clss);
1221 clss ^= 64;
1222 break;
1223 case '0': case '1': case '2': case '3': case '4':
1224 case '5': case '6': case '7': case '8': case '9':
1225 clss =
1226 (char)__parseOctal(__input._array,
1227 __input._getOffset() - 1, 3, numLength);
1228 __input._increment(numLength[0] - 1);
1229 break;
1230 default:
1231 break;
1232 }
1233 }
1234 }
1235
1236 if(range) {
1237 if(lastclss > clss)
1238 throw new MalformedPatternException(
1239 "Invalid [] range in expression.");
1240 range = false;
1241 } else {
1242 lastclss = clss;
1243
1244 if(opcodeFlag == false &&
1245 __input._getValue() == '-' &&
1246 __input._getOffset() + 1 < __input._getLength() &&
1247 __input._getValueRelative(1) != ']') {
1248 __input._increment();
1249 range = true;
1250 continue;
1251 }
1252 }
1253
1254 if(lastclss == clss) {
1255 if(opcodeFlag == true) {
1256 if(negFlag[0] == false)
1257 __emitCode(OpCode._OPCODE);
1258 else
1259 __emitCode(OpCode._NOPCODE);
1260 } else
1261 __emitCode(OpCode._ONECHAR);
1262
1263 __emitCode(clss);
1264
1265 if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
1266 Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
1267 __programSize--;
1268 __emitCode(Character.toLowerCase(clss));
1269 }
1270 }
1271
1272 if(lastclss < clss) {
1273 __emitCode(OpCode._RANGE);
1274 __emitCode(lastclss);
1275 __emitCode(clss);
1276
1277 if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
1278 Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
1279 __programSize-=2;
1280 __emitCode(Character.toLowerCase(lastclss));
1281 __emitCode(Character.toLowerCase(clss));
1282
1283 }
1284
1285 lastclss = Character.MAX_VALUE;
1286 range = false;
1287 }
1288
1289 lastclss = clss;
1290 }
1291
1292 if(__input._getValue() != ']')
1293 throw new MalformedPatternException("Unmatched [] in expression.");
1294
1295 __getNextChar();
1296 __emitCode(OpCode._END);
1297
1298 return offset;
1299 }
1300
1301
1302 /**
1303 * Parse POSIX epxression like [:foo:].
1304 *
1305 * @return OpCode. return 0 when fail parsing POSIX expression.
1306 */
1307 private char __parsePOSIX(boolean negFlag[])
1308 throws MalformedPatternException
1309 {
1310 int offset = __input._getOffset();
1311 int len = __input._getLength();
1312 int pos = offset;
1313 char value = __input._getValue(pos++);
1314 StringBuffer buf;
1315 Object opcode;
1316
1317 if( value != ':' ) return 0;
1318 if( __input._getValue(pos) == '^' ) {
1319 negFlag[0] = true;
1320 pos++;
1321 } else {
1322 negFlag[0] = false;
1323 }
1324
1325 buf = new StringBuffer();
1326
1327 try {
1328 while ( (value = __input._getValue(pos++)) != ':' && pos < len) {
1329 buf.append(value);
1330 }
1331 } catch (Exception e){
1332 return 0;
1333 }
1334
1335 if( __input._getValue(pos++) != ']'){
1336 return 0;
1337 }
1338
1339 opcode = __hashPOSIX.get(buf.toString());
1340
1341 if( opcode == null )
1342 return 0;
1343
1344 __input._setOffset(pos);
1345
1346 return ((Character)opcode).charValue();
1347 }
1348
1349
1350 private int __parseBranch(int[] retFlags) throws MalformedPatternException {
1351 boolean nestCheck = false, handleRepetition = false;
1352 int offset, next, min, max, flags[] = { 0 };
1353 char operator, value;
1354
1355 min = 0;
1356 max = Character.MAX_VALUE;
1357 offset = __parseAtom(flags);
1358
1359 if(offset == OpCode._NULL_OFFSET) {
1360 if((flags[0] & __TRYAGAIN) != 0)
1361 retFlags[0] |= __TRYAGAIN;
1362 return OpCode._NULL_OFFSET;
1363 }
1364
1365 operator = __input._getValue();
1366
1367 if(operator == '(' && __input._getValueRelative(1) == '?' &&
1368 __input._getValueRelative(2) == '#') {
1369 while(operator != CharStringPointer._END_OF_STRING && operator != ')')
1370 operator = __input._increment();
1371
1372 if(operator != CharStringPointer._END_OF_STRING) {
1373 __getNextChar();
1374 operator = __input._getValue();
1375 }
1376 }
1377
1378 if(operator == '{' &&
1379 __parseRepetition(__input._array, __input._getOffset())) {
1380 int maxOffset, pos;
1381
1382 next = __input._getOffset() + 1;
1383 pos = maxOffset = __input._getLength();
1384
1385 value = __input._getValue(next);
1386
1387 while(Character.isDigit(value) || value == ',') {
1388 if(value == ',') {
1389 if(pos != maxOffset)
1390 break;
1391 else
1392 pos = next;
1393 }
1394 ++next;
1395 value = __input._getValue(next);
1396 }
1397
1398 if(value == '}') {
1399 int num;
1400 StringBuffer buffer = new StringBuffer(10);
1401
1402 if(pos == maxOffset)
1403 pos = next;
1404 __input._increment();
1405
1406 num = __input._getOffset();
1407 value = __input._getValue(num);
1408
1409 while(Character.isDigit(value)) {
1410 buffer.append(value);
1411 ++num;
1412 value = __input._getValue(num);
1413 }
1414
1415 try {
1416 min = Integer.parseInt(buffer.toString());
1417 } catch(NumberFormatException e) {
1418 throw new MalformedPatternException(
1419 "Unexpected number format exception. Please report this bug." +
1420 "NumberFormatException message: " + e.getMessage());
1421 }
1422
1423 value = __input._getValue(pos);
1424 if(value == ',')
1425 ++pos;
1426 else
1427 pos = __input._getOffset();
1428
1429 num = pos;
1430 buffer = new StringBuffer(10);
1431
1432 value = __input._getValue(num);
1433
1434 while(Character.isDigit(value)){
1435 buffer.append(value);
1436 ++num;
1437 value = __input._getValue(num);
1438 }
1439
1440 try {
1441 if(num != pos)
1442 max = Integer.parseInt(buffer.toString());
1443 } catch(NumberFormatException e) {
1444 throw new MalformedPatternException(
1445 "Unexpected number format exception. Please report this bug." +
1446 "NumberFormatException message: " + e.getMessage());
1447 }
1448
1449 if(max == 0 && __input._getValue(pos) != '0')
1450 max = Character.MAX_VALUE;
1451 __input._setOffset(next);
1452 __getNextChar();
1453
1454 nestCheck = true;
1455 handleRepetition = true;
1456 }
1457 }
1458
1459 if(!nestCheck) {
1460 handleRepetition = false;
1461
1462 if(!__isSimpleRepetitionOp(operator)) {
1463 retFlags[0] = flags[0];
1464 return offset;
1465 }
1466
1467 __getNextChar();
1468
1469 retFlags[0] = ((operator != '+') ?
1470 (__WORSTCASE | __SPSTART) : (__WORSTCASE | __NONNULL));
1471
1472 if(operator == '*' && ((flags[0] & __SIMPLE) != 0)) {
1473 __programInsertOperator(OpCode._STAR, offset);
1474 __cost+=4;
1475 } else if(operator == '*') {
1476 min = 0;
1477 handleRepetition = true;
1478 } else if(operator == '+' && (flags[0] & __SIMPLE) != 0) {
1479 __programInsertOperator(OpCode._PLUS, offset);
1480 __cost+=3;
1481 } else if(operator == '+') {
1482 min = 1;
1483 handleRepetition = true;
1484 } else if(operator == '?') {
1485 min = 0;
1486 max = 1;
1487 handleRepetition = true;
1488 }
1489 }
1490
1491 if(handleRepetition) {
1492
1493 // handle repetition
1494 if((flags[0] & __SIMPLE) != 0){
1495 __cost+= ((2 + __cost) / 2);
1496 __programInsertOperator(OpCode._CURLY, offset);
1497 } else {
1498 __cost += (4 + __cost);
1499 __programAddTail(offset, __emitNode(OpCode._WHILEM));
1500 __programInsertOperator(OpCode._CURLYX, offset);
1501 __programAddTail(offset, __emitNode(OpCode._NOTHING));
1502 }
1503
1504 if(min > 0)
1505 retFlags[0] = (__WORSTCASE | __NONNULL);
1506
1507 if(max != 0 && max < min)
1508 throw new MalformedPatternException(
1509 "Invalid interval {" + min + "," + max + "}");
1510
1511 if(__program!= null) {
1512 __program[offset + 2] = (char)min;
1513 __program[offset + 3] = (char)max;
1514 }
1515 }
1516
1517
1518 if(__input._getValue() == '?') {
1519 __getNextChar();
1520 __programInsertOperator(OpCode._MINMOD, offset);
1521 __programAddTail(offset, offset + 2);
1522 }
1523
1524 if(__isComplexRepetitionOp(__input._array, __input._getOffset()))
1525 throw new MalformedPatternException(
1526 "Nested repetitions *?+ in expression");
1527
1528 return offset;
1529 }
1530
1531
1532 private