Source code: gnu/regexp/RE.java
1 /* gnu/regexp/RE.java
2 Copyright (C) 1998-2001, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GNU Classpath.
5
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
37
38 package gnu.regexp;
39 import java.io.InputStream;
40 import java.io.Serializable;
41 import java.util.Locale;
42 import java.util.PropertyResourceBundle;
43 import java.util.ResourceBundle;
44 import java.util.Vector;
45
46 /**
47 * RE provides the user interface for compiling and matching regular
48 * expressions.
49 * <P>
50 * A regular expression object (class RE) is compiled by constructing it
51 * from a String, StringBuffer or character array, with optional
52 * compilation flags (below)
53 * and an optional syntax specification (see RESyntax; if not specified,
54 * <code>RESyntax.RE_SYNTAX_PERL5</code> is used).
55 * <P>
56 * Once compiled, a regular expression object is reusable as well as
57 * threadsafe: multiple threads can use the RE instance simultaneously
58 * to match against different input text.
59 * <P>
60 * Various methods attempt to match input text against a compiled
61 * regular expression. These methods are:
62 * <LI><code>isMatch</code>: returns true if the input text in its
63 * entirety matches the regular expression pattern.
64 * <LI><code>getMatch</code>: returns the first match found in the
65 * input text, or null if no match is found.
66 * <LI><code>getAllMatches</code>: returns an array of all
67 * non-overlapping matches found in the input text. If no matches are
68 * found, the array is zero-length.
69 * <LI><code>substitute</code>: substitute the first occurence of the
70 * pattern in the input text with a replacement string (which may
71 * include metacharacters $0-$9, see REMatch.substituteInto).
72 * <LI><code>substituteAll</code>: same as above, but repeat for each
73 * match before returning.
74 * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration
75 * object that allows iteration over the matches (see
76 * REMatchEnumeration for some reasons why you may want to do this
77 * instead of using <code>getAllMatches</code>.
78 * <P>
79 *
80 * These methods all have similar argument lists. The input can be a
81 * String, a character array, a StringBuffer, or an
82 * InputStream of some sort. Note that when using an
83 * InputStream, the stream read position cannot be guaranteed after
84 * attempting a match (this is not a bug, but a consequence of the way
85 * regular expressions work). Using an REMatchEnumeration can
86 * eliminate most positioning problems.
87 *
88 * <P>
89 *
90 * The optional index argument specifies the offset from the beginning
91 * of the text at which the search should start (see the descriptions
92 * of some of the execution flags for how this can affect positional
93 * pattern operators). For an InputStream, this means an
94 * offset from the current read position, so subsequent calls with the
95 * same index argument on an InputStream will not
96 * necessarily access the same position on the stream, whereas
97 * repeated searches at a given index in a fixed string will return
98 * consistent results.
99 *
100 * <P>
101 * You can optionally affect the execution environment by using a
102 * combination of execution flags (constants listed below).
103 *
104 * <P>
105 * All operations on a regular expression are performed in a
106 * thread-safe manner.
107 *
108 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
109 * @version 1.1.5-dev, to be released
110 */
111
112 public class RE extends REToken {
113
114 private static final class IntPair implements Serializable {
115 public int first, second;
116 }
117
118 private static final class CharUnit implements Serializable {
119 public char ch;
120 public boolean bk;
121 }
122
123 // This String will be returned by getVersion()
124 private static final String VERSION = "1.1.5-dev";
125
126 // The localized strings are kept in a separate file
127 private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
128
129 // These are, respectively, the first and last tokens in our linked list
130 // If there is only one token, firstToken == lastToken
131 private REToken firstToken, lastToken;
132
133 // This is the number of subexpressions in this regular expression,
134 // with a minimum value of zero. Returned by getNumSubs()
135 private int numSubs;
136
137 /** Minimum length, in characters, of any possible match. */
138 private int minimumLength;
139
140 /**
141 * Compilation flag. Do not differentiate case. Subsequent
142 * searches using this RE will be case insensitive.
143 */
144 public static final int REG_ICASE = 2;
145
146 /**
147 * Compilation flag. The match-any-character operator (dot)
148 * will match a newline character. When set this overrides the syntax
149 * bit RE_DOT_NEWLINE (see RESyntax for details). This is equivalent to
150 * the "/s" operator in Perl.
151 */
152 public static final int REG_DOT_NEWLINE = 4;
153
154 /**
155 * Compilation flag. Use multiline mode. In this mode, the ^ and $
156 * anchors will match based on newlines within the input. This is
157 * equivalent to the "/m" operator in Perl.
158 */
159 public static final int REG_MULTILINE = 8;
160
161 /**
162 * Execution flag.
163 * The match-beginning operator (^) will not match at the beginning
164 * of the input string. Useful for matching on a substring when you
165 * know the context of the input is such that position zero of the
166 * input to the match test is not actually position zero of the text.
167 * <P>
168 * This example demonstrates the results of various ways of matching on
169 * a substring.
170 * <P>
171 * <CODE>
172 * String s = "food bar fool";<BR>
173 * RE exp = new RE("^foo.");<BR>
174 * REMatch m0 = exp.getMatch(s);<BR>
175 * REMatch m1 = exp.getMatch(s.substring(8));<BR>
176 * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR>
177 * REMatch m3 = exp.getMatch(s,8); <BR>
178 * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX); <BR>
179 * <P>
180 * // Results:<BR>
181 * // m0.toString(): "food"<BR>
182 * // m1.toString(): "fool"<BR>
183 * // m2.toString(): null<BR>
184 * // m3.toString(): null<BR>
185 * // m4.toString(): "fool"<BR>
186 * </CODE>
187 */
188 public static final int REG_NOTBOL = 16;
189
190 /**
191 * Execution flag.
192 * The match-end operator ($) does not match at the end
193 * of the input string. Useful for matching on substrings.
194 */
195 public static final int REG_NOTEOL = 32;
196
197 /**
198 * Execution flag.
199 * When a match method is invoked that starts matching at a non-zero
200 * index into the input, treat the input as if it begins at the index
201 * given. The effect of this flag is that the engine does not "see"
202 * any text in the input before the given index. This is useful so
203 * that the match-beginning operator (^) matches not at position 0
204 * in the input string, but at the position the search started at
205 * (based on the index input given to the getMatch function). See
206 * the example under REG_NOTBOL. It also affects the use of the \<
207 * and \b operators.
208 */
209 public static final int REG_ANCHORINDEX = 64;
210
211 /**
212 * Execution flag.
213 * The substitute and substituteAll methods will not attempt to
214 * interpolate occurrences of $1-$9 in the replacement text with
215 * the corresponding subexpressions. For example, you may want to
216 * replace all matches of "one dollar" with "$1".
217 */
218 public static final int REG_NO_INTERPOLATE = 128;
219
220 /** Returns a string representing the version of the gnu.regexp package. */
221 public static final String version() {
222 return VERSION;
223 }
224
225 // Retrieves a message from the ResourceBundle
226 static final String getLocalizedMessage(String key) {
227 return messages.getString(key);
228 }
229
230 /**
231 * Constructs a regular expression pattern buffer without any compilation
232 * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
233 *
234 * @param pattern A regular expression pattern, in the form of a String,
235 * StringBuffer or char[]. Other input types will be converted to
236 * strings using the toString() method.
237 * @exception REException The input pattern could not be parsed.
238 * @exception NullPointerException The pattern was null.
239 */
240 public RE(Object pattern) throws REException {
241 this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
242 }
243
244 /**
245 * Constructs a regular expression pattern buffer using the specified
246 * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
247 *
248 * @param pattern A regular expression pattern, in the form of a String,
249 * StringBuffer, or char[]. Other input types will be converted to
250 * strings using the toString() method.
251 * @param cflags The logical OR of any combination of the compilation flags listed above.
252 * @exception REException The input pattern could not be parsed.
253 * @exception NullPointerException The pattern was null.
254 */
255 public RE(Object pattern, int cflags) throws REException {
256 this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
257 }
258
259 /**
260 * Constructs a regular expression pattern buffer using the specified
261 * compilation flags and regular expression syntax.
262 *
263 * @param pattern A regular expression pattern, in the form of a String,
264 * StringBuffer, or char[]. Other input types will be converted to
265 * strings using the toString() method.
266 * @param cflags The logical OR of any combination of the compilation flags listed above.
267 * @param syntax The type of regular expression syntax to use.
268 * @exception REException The input pattern could not be parsed.
269 * @exception NullPointerException The pattern was null.
270 */
271 public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
272 this(pattern,cflags,syntax,0,0);
273 }
274
275 // internal constructor used for alternation
276 private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
277 super(subIndex);
278 firstToken = first;
279 lastToken = last;
280 numSubs = subs;
281 minimumLength = minLength;
282 addToken(new RETokenEndSub(subIndex));
283 }
284
285 private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
286 super(myIndex); // Subexpression index of this token.
287 initialize(patternObj, cflags, syntax, myIndex, nextSub);
288 }
289
290 // For use by subclasses
291 protected RE() { super(0); }
292
293 // The meat of construction
294 protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
295 char[] pattern;
296 if (patternObj instanceof String) {
297 pattern = ((String) patternObj).toCharArray();
298 } else if (patternObj instanceof char[]) {
299 pattern = (char[]) patternObj;
300 } else if (patternObj instanceof StringBuffer) {
301 pattern = new char [((StringBuffer) patternObj).length()];
302 ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
303 } else {
304 pattern = patternObj.toString().toCharArray();
305 }
306
307 int pLength = pattern.length;
308
309 numSubs = 0; // Number of subexpressions in this token.
310 Vector branches = null;
311
312 // linked list of tokens (sort of -- some closed loops can exist)
313 firstToken = lastToken = null;
314
315 // Precalculate these so we don't pay for the math every time we
316 // need to access them.
317 boolean insens = ((cflags & REG_ICASE) > 0);
318
319 // Parse pattern into tokens. Does anyone know if it's more efficient
320 // to use char[] than a String.charAt()? I'm assuming so.
321
322 // index tracks the position in the char array
323 int index = 0;
324
325 // this will be the current parse character (pattern[index])
326 CharUnit unit = new CharUnit();
327
328 // This is used for {x,y} calculations
329 IntPair minMax = new IntPair();
330
331 // Buffer a token so we can create a TokenRepeated, etc.
332 REToken currentToken = null;
333 char ch;
334 boolean quot = false;
335
336 while (index < pLength) {
337 // read the next character unit (including backslash escapes)
338 index = getCharUnit(pattern,index,unit,quot);
339
340 if (unit.bk)
341 if (unit.ch == 'Q') {
342 quot = true;
343 continue;
344 } else if (unit.ch == 'E') {
345 quot = false;
346 continue;
347 }
348 if (quot)
349 unit.bk = false;
350
351 // ALTERNATION OPERATOR
352 // \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
353 // not available if RE_LIMITED_OPS is set
354
355 // TODO: the '\n' literal here should be a test against REToken.newline,
356 // which unfortunately may be more than a single character.
357 if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
358 || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
359 && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
360 // make everything up to here be a branch. create vector if nec.
361 addToken(currentToken);
362 RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
363 minimumLength = 0;
364 if (branches == null) {
365 branches = new Vector();
366 }
367 branches.addElement(theBranch);
368 firstToken = lastToken = currentToken = null;
369 }
370
371 // INTERVAL OPERATOR:
372 // {x} | {x,} | {x,y} (RE_INTERVALS && RE_NO_BK_BRACES)
373 // \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)
374 //
375 // OPEN QUESTION:
376 // what is proper interpretation of '{' at start of string?
377
378 else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
379 int newIndex = getMinMax(pattern,index,minMax,syntax);
380 if (newIndex > index) {
381 if (minMax.first > minMax.second)
382 throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
383 if (currentToken == null)
384 throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
385 if (currentToken instanceof RETokenRepeated)
386 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
387 if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
388 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
389 if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))
390 throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);
391 index = newIndex;
392 currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);
393 }
394 else {
395 addToken(currentToken);
396 currentToken = new RETokenChar(subIndex,unit.ch,insens);
397 }
398 }
399
400 // LIST OPERATOR:
401 // [...] | [^...]
402
403 else if ((unit.ch == '[') && !(unit.bk || quot)) {
404 Vector options = new Vector();
405 boolean negative = false;
406 char lastChar = 0;
407 if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
408
409 // Check for initial caret, negation
410 if ((ch = pattern[index]) == '^') {
411 negative = true;
412 if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
413 ch = pattern[index];
414 }
415
416 // Check for leading right bracket literal
417 if (ch == ']') {
418 lastChar = ch;
419 if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
420 }
421
422 while ((ch = pattern[index++]) != ']') {
423 if ((ch == '-') && (lastChar != 0)) {
424 if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
425 if ((ch = pattern[index]) == ']') {
426 options.addElement(new RETokenChar(subIndex,lastChar,insens));
427 lastChar = '-';
428 } else {
429 options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
430 lastChar = 0;
431 index++;
432 }
433 } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
434 if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
435 int posixID = -1;
436 boolean negate = false;
437 char asciiEsc = 0;
438 if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
439 switch (pattern[index]) {
440 case 'D':
441 negate = true;
442 case 'd':
443 posixID = RETokenPOSIX.DIGIT;
444 break;
445 case 'S':
446 negate = true;
447 case 's':
448 posixID = RETokenPOSIX.SPACE;
449 break;
450 case 'W':
451 negate = true;
452 case 'w':
453 posixID = RETokenPOSIX.ALNUM;
454 break;
455 }
456 }
457 else if ("nrt".indexOf(pattern[index]) != -1) {
458 switch (pattern[index]) {
459 case 'n':
460 asciiEsc = '\n';
461 break;
462 case 't':
463 asciiEsc = '\t';
464 break;
465 case 'r':
466 asciiEsc = '\r';
467 break;
468 }
469 }
470 if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
471
472 if (posixID != -1) {
473 options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
474 } else if (asciiEsc != 0) {
475 lastChar = asciiEsc;
476 } else {
477 lastChar = pattern[index];
478 }
479 ++index;
480 } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
481 StringBuffer posixSet = new StringBuffer();
482 index = getPosixSet(pattern,index+1,posixSet);
483 int posixId = RETokenPOSIX.intValue(posixSet.toString());
484 if (posixId != -1)
485 options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
486 } else {
487 if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
488 lastChar = ch;
489 }
490 if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
491 } // while in list
492 // Out of list, index is one past ']'
493
494 if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
495
496 // Create a new RETokenOneOf
497 addToken(currentToken);
498 options.trimToSize();
499 currentToken = new RETokenOneOf(subIndex,options,negative);
500 }
501
502 // SUBEXPRESSIONS
503 // (...) | \(...\) depending on RE_NO_BK_PARENS
504
505 else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
506 boolean pure = false;
507 boolean comment = false;
508 boolean lookAhead = false;
509 boolean negativelh = false;
510 if ((index+1 < pLength) && (pattern[index] == '?')) {
511 switch (pattern[index+1]) {
512 case '!':
513 if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
514 pure = true;
515 negativelh = true;
516 lookAhead = true;
517 index += 2;
518 }
519 break;
520 case '=':
521 if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
522 pure = true;
523 lookAhead = true;
524 index += 2;
525 }
526 break;
527 case ':':
528 if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
529 pure = true;
530 index += 2;
531 }
532 break;
533 case '#':
534 if (syntax.get(RESyntax.RE_COMMENTS)) {
535 comment = true;
536 }
537 break;
538 default:
539 throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
540 }
541 }
542
543 if (index >= pLength) {
544 throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
545 }
546
547 // find end of subexpression
548 int endIndex = index;
549 int nextIndex = index;
550 int nested = 0;
551
552 while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
553 && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )
554 if ((endIndex = nextIndex) >= pLength)
555 throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
556 else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
557 nested++;
558 else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
559 nested--;
560
561 // endIndex is now position at a ')','\)'
562 // nextIndex is end of string or position after ')' or '\)'
563
564 if (comment) index = nextIndex;
565 else { // not a comment
566 // create RE subexpression as token.
567 addToken(currentToken);
568 if (!pure) {
569 numSubs++;
570 }
571
572 int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;
573 currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
574 numSubs += ((RE) currentToken).getNumSubs();
575
576 if (lookAhead) {
577 currentToken = new RETokenLookAhead(currentToken,negativelh);
578 }
579
580 index = nextIndex;
581 } // not a comment
582 } // subexpression
583
584 // UNMATCHED RIGHT PAREN
585 // ) or \) throw exception if
586 // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
587 else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
588 throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
589 }
590
591 // START OF LINE OPERATOR
592 // ^
593
594 else if ((unit.ch == '^') && !(unit.bk || quot)) {
595 addToken(currentToken);
596 currentToken = null;
597 addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
598 }
599
600 // END OF LINE OPERATOR
601 // $
602
603 else if ((unit.ch == '$') && !(unit.bk || quot)) {
604 addToken(currentToken);
605 currentToken = null;
606 addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
607 }
608
609 // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
610 // .
611
612 else if ((unit.ch == '.') && !(unit.bk || quot)) {
613 addToken(currentToken);
614 currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
615 }
616
617 // ZERO-OR-MORE REPEAT OPERATOR
618 // *
619
620 else if ((unit.ch == '*') && !(unit.bk || quot)) {
621 if (currentToken == null)
622 throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
623 if (currentToken instanceof RETokenRepeated)
624 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
625 if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
626 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
627 if (currentToken.getMinimumLength() == 0)
628 throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
629 currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
630 }
631
632 // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR
633 // + | \+ depending on RE_BK_PLUS_QM
634 // not available if RE_LIMITED_OPS is set
635
636 else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
637 if (currentToken == null)
638 throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
639
640 // Check for possessive matching on RETokenRepeated
641 if (currentToken instanceof RETokenRepeated) {
642 RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
643 if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
644 tokenRep.makePossessive();
645 else
646 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
647
648 }
649 else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
650 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
651 else if (currentToken.getMinimumLength() == 0)
652 throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
653 else
654 currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
655 }
656
657 // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
658 // ? | \? depending on RE_BK_PLUS_QM
659 // not available if RE_LIMITED_OPS is set
660 // stingy matching if RE_STINGY_OPS is set and it follows a quantifier
661
662 else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
663 if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
664
665 // Check for stingy matching on RETokenRepeated
666 if (currentToken instanceof RETokenRepeated) {
667 RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
668 if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
669 tokenRep.makeStingy();
670 else
671 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
672 }
673 else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
674 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
675 else
676 currentToken = setRepeated(currentToken,0,1,index);
677 }
678
679 // BACKREFERENCE OPERATOR
680 // \1 \2 ... \9
681 // not available if RE_NO_BK_REFS is set
682
683 else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
684 addToken(currentToken);
685 currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
686 }
687
688 // START OF STRING OPERATOR
689 // \A if RE_STRING_ANCHORS is set
690
691 else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
692 addToken(currentToken);
693 currentToken = new RETokenStart(subIndex,null);
694 }
695
696 // WORD BREAK OPERATOR
697 // \b if ????
698
699 else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
700 addToken(currentToken);
701 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
702 }
703
704 // WORD BEGIN OPERATOR
705 // \< if ????
706 else if (unit.bk && (unit.ch == '<')) {
707 addToken(currentToken);
708 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
709 }
710
711 // WORD END OPERATOR
712 // \> if ????
713 else if (unit.bk && (unit.ch == '>')) {
714 addToken(currentToken);
715 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
716 }
717
718 // NON-WORD BREAK OPERATOR
719 // \B if ????
720
721 else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
722 addToken(currentToken);
723 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
724 }
725
726
727 // DIGIT OPERATOR
728 // \d if RE_CHAR_CLASS_ESCAPES is set
729
730 else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
731 addToken(currentToken);
732 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
733 }
734
735 // NON-DIGIT OPERATOR
736 // \D
737
738 else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
739 addToken(currentToken);
740 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
741 }
742
743 // NEWLINE ESCAPE
744 // \n
745
746 else if (unit.bk && (unit.ch == 'n')) {
747 addToken(currentToken);
748 currentToken = new RETokenChar(subIndex,'\n',false);
749 }
750
751 // RETURN ESCAPE
752 // \r
753
754 else if (unit.bk && (unit.ch == 'r')) {
755 addToken(currentToken);
756 currentToken = new RETokenChar(subIndex,'\r',false);
757 }
758
759 // WHITESPACE OPERATOR
760 // \s if RE_CHAR_CLASS_ESCAPES is set
761
762 else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
763 addToken(currentToken);
764 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
765 }
766
767 // NON-WHITESPACE OPERATOR
768 // \S
769
770 else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
771 addToken(currentToken);
772 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
773 }
774
775 // TAB ESCAPE
776 // \t
777
778 else if (unit.bk && (unit.ch == 't')) {
779 addToken(currentToken);
780 currentToken = new RETokenChar(subIndex,'\t',false);
781 }
782
783 // ALPHANUMERIC OPERATOR
784 // \w
785
786 else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
787 addToken(currentToken);
788 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
789 }
790
791 // NON-ALPHANUMERIC OPERATOR
792 // \W
793
794 else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
795 addToken(currentToken);
796 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
797 }
798
799 // END OF STRING OPERATOR
800 // \Z
801
802 else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
803 addToken(currentToken);
804 currentToken = new RETokenEnd(subIndex,null);
805 }
806
807 // NON-SPECIAL CHARACTER (or escape to make literal)
808 // c | \* for example
809
810 else { // not a special character
811 addToken(currentToken);
812 currentToken = new RETokenChar(subIndex,unit.ch,insens);
813 }
814 } // end while
815
816 // Add final buffered token and an EndSub marker
817 addToken(currentToken);
818
819 if (branches != null) {
820 branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
821 branches.trimToSize(); // compact the Vector
822 minimumLength = 0;
823 firstToken = lastToken = null;
824 addToken(new RETokenOneOf(subIndex,branches,false));
825 }
826 else addToken(new RETokenEndSub(subIndex));
827
828 }
829
830 private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
831 unit.ch = input[index++];
832 unit.bk = (unit.ch == '\\'
833 && (!quot || index >= input.length || input[index] == 'E'));
834 if (unit.bk)
835 if (index < input.length)
836 unit.ch = input[index++];
837 else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
838 return index;
839 }
840
841 /**
842 * Checks if the regular expression matches the input in its entirety.
843 *
844 * @param input The input text.
845 */
846 public boolean isMatch(Object input) {
847 return isMatch(input,0,0);
848 }
849
850 /**
851 * Checks if the input string, starting from index, is an exact match of
852 * this regular expression.
853 *
854 * @param input The input text.
855 * @param index The offset index at which the search should be begin.
856 */
857 public boolean isMatch(Object input,int index) {
858 return isMatch(input,index,0);
859 }
860
861
862 /**
863 * Checks if the input, starting from index and using the specified
864 * execution flags, is an exact match of this regular expression.
865 *
866 * @param input The input text.
867 * @param index The offset index at which the search should be begin.
868 * @param eflags The logical OR of any execution flags above.
869 */
870 public boolean isMatch(Object input,int index,int eflags) {
871 return isMatchImpl(makeCharIndexed(input,index),index,eflags);
872 }
873
874 private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
875 if (firstToken == null) // Trivial case
876 return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
877 REMatch m = new REMatch(numSubs, index, eflags);
878 if (firstToken.match(input, m)) {
879 while (m != null) {
880 if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
881 return true;
882 }
883 m = m.next;
884 }
885 }
886 return false;
887 }
888
889 /**
890 * Returns the maximum number of subexpressions in this regular expression.
891 * If the expression contains branches, the value returned will be the
892 * maximum subexpressions in any of the branches.
893 */
894 public int getNumSubs() {
895 return numSubs;
896 }
897
898 // Overrides REToken.setUncle
899 void setUncle(REToken uncle) {
900 if (lastToken != null) {
901 lastToken.setUncle(uncle);
902 } else super.setUncle(uncle); // to deal with empty subexpressions
903 }
904
905 // Overrides REToken.chain
906
907 boolean chain(REToken next) {
908 super.chain(next);
909 setUncle(next);
910 return true;
911 }
912
913 /**
914 * Returns the minimum number of characters that could possibly
915 * constitute a match of this regular expression.
916 */
917 public int getMinimumLength() {
918 return minimumLength;
919 }
920
921 /**
922 * Returns an array of all matches found in the input.
923 *
924 * If the regular expression allows the empty string to match, it will
925 * substitute matches at all positions except the end of the input.
926 *
927 * @param input The input text.
928 * @return a non-null (but possibly zero-length) array of matches
929 */
930 public REMatch[] getAllMatches(Object input) {
931 return getAllMatches(input,0,0);
932 }
933
934 /**
935 * Returns an array of all matches found in the input,
936 * beginning at the specified index position.
937 *
938 * If the regular expression allows the empty string to match, it will
939 * substitute matches at all positions except the end of the input.
940 *
941 * @param input The input text.
942 * @param index The offset index at which the search should be begin.
943 * @return a non-null (but possibly zero-length) array of matches
944 */
945 public REMatch[] getAllMatches(Object input, int index) {
946 return getAllMatches(input,index,0);
947 }
948
949 /**
950 * Returns an array of all matches found in the input string,
951 * beginning at the specified index position and using the specified
952 * execution flags.
953 *
954 * If the regular expression allows the empty string to match, it will
955 * substitute matches at all positions except the end of the input.
956 *
957 * @param input The input text.
958 * @param index The offset index at which the search should be begin.
959 * @param eflags The logical OR of any execution flags above.
960 * @return a non-null (but possibly zero-length) array of matches
961 */
962 public REMatch[] getAllMatches(Object input, int index, int eflags) {
963 return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
964 }
965
966 // this has been changed since 1.03 to be non-overlapping matches
967 private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
968 Vector all = new Vector();
969 REMatch m = null;
970 while ((m = getMatchImpl(input,index,eflags,null)) != null) {
971 all.addElement(m);
972 index = m.getEndIndex();
973 if (m.end[0] == 0) { // handle pathological case of zero-length match
974 index++;
975 input.move(1);
976 } else {
977 input.move(m.end[0]);
978 }
979 if (!input.isValid()) break;
980 }
981 REMatch[] mset = new REMatch[all.size()];
982 all.copyInto(mset);
983 return mset;
984 }
985
986 /* Implements abstract method REToken.match() */
987 boolean match(CharIndexed input, REMatch mymatch) {
988 if (firstToken == null) return next(input, mymatch);
989
990 // Note the start of this subexpression
991 mymatch.start[subIndex] = mymatch.index;
992
993 return firstToken.match(input, mymatch);
994 }
995
996 /**
997 * Returns the first match found in the input. If no match is found,
998 * null is returned.
999 *
1000 * @param input The input text.
1001 * @return An REMatch instance referencing the match, or null if none.
1002 */
1003 public REMatch getMatch(Object input) {
1004 return getMatch(input,0,0);
1005 }
1006
1007 /**
1008 * Returns the first match found in the input, beginning
1009 * the search at the specified index. If no match is found,
1010 * returns null.
1011 *
1012 * @param input The input text.
1013 * @param index The offset within the text to begin looking for a match.
1014 * @return An REMatch instance referencing the match, or null if none.
1015 */
1016 public REMatch getMatch(Object input, int index) {
1017 return getMatch(input,index,0);
1018 }
1019
1020 /**
1021 * Returns the first match found in the input, beginning
1022 * the search at the specified index, and using the specified
1023 * execution flags. If no match is found, returns null.
1024 *
1025 * @param input The input text.
1026 * @param index The offset index at which the search should be begin.
1027 * @param eflags The logical OR of any execution flags above.
1028 * @return An REMatch instance referencing the match, or null if none.
1029 */
1030 public REMatch getMatch(Object input, int index, int eflags) {
1031 return getMatch(input,index,eflags,null);
1032 }
1033
1034 /**
1035 * Returns the first match found in the input, beginning the search
1036 * at the specified index, and using the specified execution flags.
1037 * If no match is found, returns null. If a StringBuffer is
1038 * provided and is non-null, the contents of the input text from the
1039 * index to the beginning of the match (or to the end of the input,
1040 * if there is no match) are appended to the StringBuffer.
1041 *
1042 * @param input The input text.
1043 * @param index The offset index at which the search should be begin.
1044 * @param eflags The logical OR of any execution flags above.
1045 * @param buffer The StringBuffer to save pre-match text in.
1046 * @return An REMatch instance referencing the match, or null if none. */
1047 public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
1048 return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1049 }
1050
1051 REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
1052 // Create a new REMatch to hold results
1053 REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1054 do {
1055 // Optimization: check if anchor + minimumLength > length
1056 if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1057 if (match(input, mymatch)) {
1058 // Find longest match of them all to observe leftmost longest
1059 REMatch longest = mymatch;
1060 while ((mymatch = mymatch.next) != null) {
1061 if (mymatch.index > longest.index) {
1062 longest = mymatch;
1063 }
1064 }
1065
1066 longest.end[0] = longest.index;
1067 longest.finish(input);
1068 return longest;
1069 }
1070 }
1071 mymatch.clear(++anchor);
1072 // Append character to buffer if needed
1073 if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1074 buffer.append(input.charAt(0));
1075 }
1076 } while (input.move(1));
1077
1078 // Special handling at end of input for e.g. "$"
1079 if (minimumLength == 0) {
1080 if (match(input, mymatch)) {
1081 mymatch.finish(input);
1082 return mymatch;
1083 }
1084 }
1085
1086 return null;
1087 }
1088
1089 /**
1090 * Returns an REMatchEnumeration that can be used to iterate over the
1091 * matches found in the input text.
1092 *
1093 * @param input The input text.
1094 * @return A non-null REMatchEnumeration instance.
1095 */
1096 public REMatchEnumeration getMatchEnumeration(Object input) {
1097 return getMatchEnumeration(input,0,0);
1098 }
1099
1100
1101 /**
1102 * Returns an REMatchEnumeration that can be used to iterate over the
1103 * matches found in the input text.
1104 *
1105 * @param input The input text.
1106 * @param index The offset index at which the search should be begin.
1107 * @return A non-null REMatchEnumeration instance, with its input cursor
1108 * set to the index position specified.
1109 */
1110 public REMatchEnumeration getMatchEnumeration(Object input, int index) {
1111 return getMatchEnumeration(input,index,0);
1112 }
1113
1114 /**
1115 * Returns an REMatchEnumeration that can be used to iterate over the
1116 * matches found in the input text.
1117 *
1118 * @param input The input text.
1119 * @param index The offset index at which the search should be begin.
1120 * @param eflags The logical OR of any execution flags above.
1121 * @return A non-null REMatchEnumeration instance, with its input cursor
1122 * set to the index position specified.
1123 */
1124 public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
1125 return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1126 }
1127
1128
1129 /**
1130 * Substitutes the replacement text for the first match found in the input.
1131 *
1132 * @param input The input text.
1133 * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1134 * @return A String interpolating the substituted text.
1135 * @see REMatch#substituteInto
1136 */
1137 public String substitute(Object input,String replace) {
1138 return substitute(input,replace,0,0);
1139 }
1140
1141 /**
1142 * Substitutes the replacement text for the first match found in the input
1143 * beginning at the specified index position. Specifying an index
1144 * effectively causes the regular expression engine to throw away the
1145 * specified number of characters.
1146 *
1147 * @param input The input text.
1148 * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1149 * @param index The offset index at which the search should be begin.
1150 * @return A String containing the substring of the input, starting
1151 * at the index position, and interpolating the substituted text.
1152 * @see REMatch#substituteInto
1153 */
1154 public String substitute(Object input,String replace,int index) {
1155 return substitute(input,replace,index,0);
1156 }
1157
1158 /**
1159 * Substitutes the replacement text for the first match found in the input
1160 * string, beginning at the specified index position and using the
1161 * specified execution flags.
1162 *
1163 * @param input The input text.
1164 * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1165 * @param index The offset index at which the search should be begin.
1166 * @param eflags The logical OR of any execution flags above.
1167 * @return A String containing the substring of the input, starting
1168 * at the index position, and interpolating the substituted text.
1169 * @see REMatch#substituteInto
1170 */
1171 public String substitute(Object input,String replace,int index,int eflags) {
1172 return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1173 }
1174
1175 private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
1176 StringBuffer buffer = new StringBuffer();
1177 REMatch m = getMatchImpl(input,index,eflags,buffer);
1178 if (m==null) return buffer.toString();
1179 buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1180 replace : m.substituteInto(replace) );
1181 if (input.move(m.end[0])) {
1182 do {
1183 buffer.append(input.charAt(0));
1184 } while (input.move(1));
1185 }
1186 return buffer.toString();
1187 }
1188
1189 /**
1190 * Substitutes the replacement text for each non-overlapping match found
1191 * in the input text.
1192 *
1193 * @param input The input text.
1194 * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1195 * @return A String interpolating the substituted text.
1196 * @see REMatch#substituteInto
1197 */
1198 public String substituteAll(Object input,String replace) {
1199 return substituteAll(input,replace,0,0);
1200 }
1201
1202 /**
1203 * Substitutes the replacement text for each non-overlapping match found
1204 * in the input text, starting at the specified index.
1205 *
1206 * If the regular expression allows the empty string to match, it will
1207 * substitute matches at all positions except the end of the input.
1208 *
1209 * @param input The input text.
1210 * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1211 * @param index The offset index at which the search should be begin.
1212 * @return A String containing the substring of the input, starting
1213 * at the index position, and interpolating the substituted text.
1214 * @see REMatch#substituteInto
1215 */
1216 public String substituteAll(Object input,String replace,int index) {
1217 return substituteAll(input,replace,index,0);
1218 }
1219
1220 /**
1221 * Substitutes the replacement text for each non-overlapping match found
1222 * in the input text, starting at the specified index and using the
1223 * specified execution flags.
1224 *
1225 * @param input The input text.
1226 * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1227 * @param index The offset index at which the search should be begin.
1228 * @param eflags The logical OR of any execution flags above.
1229 * @return A String containing the substring of the input, starting
1230 * at the index position, and interpolating the substituted text.
1231 * @see REMatch#substituteInto
1232 */
1233 public String substituteAll(Object input,String replace,int index,int eflags) {
1234 return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1235 }
1236
1237 private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
1238 StringBuffer buffer = new StringBuffer();
1239 REMatch m;
1240 while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1241 buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1242 replace : m.substituteInto(replace) );
1243 index = m.getEndIndex();
1244 if (m.end[0] == 0) {
1245 char ch = input.charAt(0);
1246 if (ch != CharIndexed.OUT_OF_BOUNDS)
1247 buffer.append(ch);
1248 input.move(1);
1249 } else {
1250 input.move(m.end[0]);
1251 }
1252
1253 if (!input.isValid()) break;
1254 }
1255 return buffer.toString();
1256 }
1257
1258 /* Helper function for constructor */
1259 private void addToken(REToken next) {
1260 if (next == null) return;
1261 minimumLength += next.getMinimumLength();
1262 if (firstToken == null) {
1263 lastToken = firstToken = next;
1264 } else {
1265 // if chain returns false, it "rejected" the token due to
1266 // an optimization, and next was combined with lastToken
1267 if (lastToken.chain(next)) {
1268 lastToken = next;
1269 }
1270 }
1271 }
1272
1273 private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1274 if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1275 return new RETokenRepeated(current.subIndex,current,min,max);
1276 }
1277
1278 private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
1279 // Precondition: pattern[index-1] == ':'
1280 // we will return pos of closing ']'.
1281 int i;
1282 for (i=index; i<(pattern.length-1); i++) {
1283 if ((pattern[i] == ':') && (pattern[i+1] == ']'))
1284 return i+2;
12