Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: gnu/regexp/RE.java


1   /* gnu/regexp/RE.java
2      Copyright (C) 1998-2001, 2004, 2005 Free Software Foundation, Inc.
3   
4   This file is part of GNU Classpath.
5   
6   GNU Classpath is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10  
11  GNU Classpath is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  General Public License for more details.
15  
16  You should have received a copy of the GNU General Public License
17  along with GNU Classpath; see the file COPYING.  If not, write to the
18  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  02110-1301 USA.
20  
21  Linking this library statically or dynamically with other modules is
22  making a combined work based on this library.  Thus, the terms and
23  conditions of the GNU General Public License cover the whole
24  combination.
25  
26  As a special exception, the copyright holders of this library give you
27  permission to link this library with independent modules to produce an
28  executable, regardless of the license terms of these independent
29  modules, and to copy and distribute the resulting executable under
30  terms of your choice, provided that you also meet, for each linked
31  independent module, the terms and conditions of the license of that
32  module.  An independent module is a module which is not derived from
33  or based on this library.  If you modify this library, you may extend
34  this exception to your version of the library, but you are not
35  obligated to do so.  If you do not wish to do so, delete this
36  exception statement from your version. */
37  
38  package gnu.regexp;
39  import java.io.InputStream;
40  import java.io.Serializable;
41  import java.util.Locale;
42  import java.util.PropertyResourceBundle;
43  import java.util.ResourceBundle;
44  import java.util.Vector;
45  
46  /**
47   * RE provides the user interface for compiling and matching regular
48   * expressions.
49   * <P>
50   * A regular expression object (class RE) is compiled by constructing it
51   * from a String, StringBuffer or character array, with optional 
52   * compilation flags (below)
53   * and an optional syntax specification (see RESyntax; if not specified,
54   * <code>RESyntax.RE_SYNTAX_PERL5</code> is used).
55   * <P>
56   * Once compiled, a regular expression object is reusable as well as
57   * threadsafe: multiple threads can use the RE instance simultaneously
58   * to match against different input text.
59   * <P>
60   * Various methods attempt to match input text against a compiled
61   * regular expression.  These methods are:
62   * <LI><code>isMatch</code>: returns true if the input text in its
63   * entirety matches the regular expression pattern.
64   * <LI><code>getMatch</code>: returns the first match found in the
65   * input text, or null if no match is found.
66   * <LI><code>getAllMatches</code>: returns an array of all
67   * non-overlapping matches found in the input text.  If no matches are
68   * found, the array is zero-length.
69   * <LI><code>substitute</code>: substitute the first occurence of the
70   * pattern in the input text with a replacement string (which may
71   * include metacharacters $0-$9, see REMatch.substituteInto).
72   * <LI><code>substituteAll</code>: same as above, but repeat for each
73   * match before returning.
74   * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration
75   * object that allows iteration over the matches (see
76   * REMatchEnumeration for some reasons why you may want to do this
77   * instead of using <code>getAllMatches</code>.
78   * <P>
79   *
80   * These methods all have similar argument lists.  The input can be a
81   * String, a character array, a StringBuffer, or an
82   * InputStream of some sort.  Note that when using an
83   * InputStream, the stream read position cannot be guaranteed after
84   * attempting a match (this is not a bug, but a consequence of the way
85   * regular expressions work).  Using an REMatchEnumeration can
86   * eliminate most positioning problems.
87   *
88   * <P>
89   *
90   * The optional index argument specifies the offset from the beginning
91   * of the text at which the search should start (see the descriptions
92   * of some of the execution flags for how this can affect positional
93   * pattern operators).  For an InputStream, this means an
94   * offset from the current read position, so subsequent calls with the
95   * same index argument on an InputStream will not
96   * necessarily access the same position on the stream, whereas
97   * repeated searches at a given index in a fixed string will return
98   * consistent results.
99   *
100  * <P>
101  * You can optionally affect the execution environment by using a
102  * combination of execution flags (constants listed below).
103  * 
104  * <P>
105  * All operations on a regular expression are performed in a
106  * thread-safe manner.
107  *
108  * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
109  * @version 1.1.5-dev, to be released
110  */
111 
112 public class RE extends REToken {
113 
114   private static final class IntPair implements Serializable {
115     public int first, second;
116   }
117 
118   private static final class CharUnit implements Serializable {
119     public char ch;
120     public boolean bk;
121   }
122 
123   // This String will be returned by getVersion()
124   private static final String VERSION = "1.1.5-dev";
125 
126   // The localized strings are kept in a separate file
127   private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
128 
129   // These are, respectively, the first and last tokens in our linked list
130   // If there is only one token, firstToken == lastToken
131   private REToken firstToken, lastToken;
132 
133   // This is the number of subexpressions in this regular expression,
134   // with a minimum value of zero.  Returned by getNumSubs()
135   private int numSubs;
136 
137     /** Minimum length, in characters, of any possible match. */
138     private int minimumLength;
139 
140   /**
141    * Compilation flag. Do  not  differentiate  case.   Subsequent
142    * searches  using  this  RE will be case insensitive.
143    */
144   public static final int REG_ICASE = 2;
145 
146   /**
147    * Compilation flag. The match-any-character operator (dot)
148    * will match a newline character.  When set this overrides the syntax
149    * bit RE_DOT_NEWLINE (see RESyntax for details).  This is equivalent to
150    * the "/s" operator in Perl.
151    */
152   public static final int REG_DOT_NEWLINE = 4;
153 
154   /**
155    * Compilation flag. Use multiline mode.  In this mode, the ^ and $
156    * anchors will match based on newlines within the input. This is
157    * equivalent to the "/m" operator in Perl.
158    */
159   public static final int REG_MULTILINE = 8;
160 
161   /**
162    * Execution flag.
163    * The match-beginning operator (^) will not match at the beginning
164    * of the input string. Useful for matching on a substring when you
165    * know the context of the input is such that position zero of the
166    * input to the match test is not actually position zero of the text.
167    * <P>
168    * This example demonstrates the results of various ways of matching on
169    * a substring.
170    * <P>
171    * <CODE>
172    * String s = "food bar fool";<BR>
173    * RE exp = new RE("^foo.");<BR>
174    * REMatch m0 = exp.getMatch(s);<BR>
175    * REMatch m1 = exp.getMatch(s.substring(8));<BR>
176    * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR>
177    * REMatch m3 = exp.getMatch(s,8);                            <BR>
178    * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX);         <BR>
179    * <P>
180    * // Results:<BR>
181    * //  m0.toString(): "food"<BR>
182    * //  m1.toString(): "fool"<BR>
183    * //  m2.toString(): null<BR>
184    * //  m3.toString(): null<BR>
185    * //  m4.toString(): "fool"<BR>
186    * </CODE>
187    */
188   public static final int REG_NOTBOL = 16;
189 
190   /**
191    * Execution flag.
192    * The match-end operator ($) does not match at the end
193    * of the input string. Useful for matching on substrings.
194    */
195   public static final int REG_NOTEOL = 32;
196 
197   /**
198    * Execution flag.
199    * When a match method is invoked that starts matching at a non-zero
200    * index into the input, treat the input as if it begins at the index
201    * given.  The effect of this flag is that the engine does not "see"
202    * any text in the input before the given index.  This is useful so
203    * that the match-beginning operator (^) matches not at position 0
204    * in the input string, but at the position the search started at
205    * (based on the index input given to the getMatch function).  See
206    * the example under REG_NOTBOL.  It also affects the use of the \&lt;
207    * and \b operators.
208    */
209   public static final int REG_ANCHORINDEX = 64;
210 
211   /**
212    * Execution flag.
213    * The substitute and substituteAll methods will not attempt to
214    * interpolate occurrences of $1-$9 in the replacement text with
215    * the corresponding subexpressions.  For example, you may want to
216    * replace all matches of "one dollar" with "$1".
217    */
218   public static final int REG_NO_INTERPOLATE = 128;
219 
220   /** Returns a string representing the version of the gnu.regexp package. */
221   public static final String version() {
222     return VERSION;
223   }
224 
225   // Retrieves a message from the ResourceBundle
226   static final String getLocalizedMessage(String key) {
227     return messages.getString(key);
228   }
229 
230   /**
231    * Constructs a regular expression pattern buffer without any compilation
232    * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
233    *
234    * @param pattern A regular expression pattern, in the form of a String,
235    *   StringBuffer or char[].  Other input types will be converted to
236    *   strings using the toString() method.
237    * @exception REException The input pattern could not be parsed.
238    * @exception NullPointerException The pattern was null.
239    */
240   public RE(Object pattern) throws REException {
241     this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
242   }
243 
244   /**
245    * Constructs a regular expression pattern buffer using the specified
246    * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
247    *
248    * @param pattern A regular expression pattern, in the form of a String,
249    *   StringBuffer, or char[].  Other input types will be converted to
250    *   strings using the toString() method.
251    * @param cflags The logical OR of any combination of the compilation flags listed above.
252    * @exception REException The input pattern could not be parsed.
253    * @exception NullPointerException The pattern was null.
254    */
255   public RE(Object pattern, int cflags) throws REException {
256     this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
257   }
258 
259   /**
260    * Constructs a regular expression pattern buffer using the specified
261    * compilation flags and regular expression syntax.
262    *
263    * @param pattern A regular expression pattern, in the form of a String,
264    *   StringBuffer, or char[].  Other input types will be converted to
265    *   strings using the toString() method.
266    * @param cflags The logical OR of any combination of the compilation flags listed above.
267    * @param syntax The type of regular expression syntax to use.
268    * @exception REException The input pattern could not be parsed.
269    * @exception NullPointerException The pattern was null.
270    */
271   public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
272     this(pattern,cflags,syntax,0,0);
273   }
274 
275   // internal constructor used for alternation
276   private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
277     super(subIndex);
278     firstToken = first;
279     lastToken = last;
280     numSubs = subs;
281     minimumLength = minLength;
282     addToken(new RETokenEndSub(subIndex));
283   }
284 
285   private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
286     super(myIndex); // Subexpression index of this token.
287     initialize(patternObj, cflags, syntax, myIndex, nextSub);
288   }
289 
290     // For use by subclasses
291     protected RE() { super(0); }
292 
293     // The meat of construction
294   protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
295       char[] pattern;
296     if (patternObj instanceof String) {
297       pattern = ((String) patternObj).toCharArray();
298     } else if (patternObj instanceof char[]) {
299       pattern = (char[]) patternObj;
300     } else if (patternObj instanceof StringBuffer) {
301       pattern = new char [((StringBuffer) patternObj).length()];
302       ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
303     } else {
304   pattern = patternObj.toString().toCharArray();
305     }
306 
307     int pLength = pattern.length;
308 
309     numSubs = 0; // Number of subexpressions in this token.
310     Vector branches = null;
311 
312     // linked list of tokens (sort of -- some closed loops can exist)
313     firstToken = lastToken = null;
314 
315     // Precalculate these so we don't pay for the math every time we
316     // need to access them.
317     boolean insens = ((cflags & REG_ICASE) > 0);
318 
319     // Parse pattern into tokens.  Does anyone know if it's more efficient
320     // to use char[] than a String.charAt()?  I'm assuming so.
321 
322     // index tracks the position in the char array
323     int index = 0;
324 
325     // this will be the current parse character (pattern[index])
326     CharUnit unit = new CharUnit();
327 
328     // This is used for {x,y} calculations
329     IntPair minMax = new IntPair();
330 
331     // Buffer a token so we can create a TokenRepeated, etc.
332     REToken currentToken = null;
333     char ch;
334     boolean quot = false;
335 
336     while (index < pLength) {
337       // read the next character unit (including backslash escapes)
338       index = getCharUnit(pattern,index,unit,quot);
339 
340       if (unit.bk)
341         if (unit.ch == 'Q') {
342           quot = true;
343           continue;
344         } else if (unit.ch == 'E') {
345           quot = false;
346           continue;
347         }
348       if (quot)
349         unit.bk = false;
350 
351       // ALTERNATION OPERATOR
352       //  \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
353       //  not available if RE_LIMITED_OPS is set
354 
355       // TODO: the '\n' literal here should be a test against REToken.newline,
356       // which unfortunately may be more than a single character.
357       if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
358        || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
359      && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
360   // make everything up to here be a branch. create vector if nec.
361   addToken(currentToken);
362   RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
363   minimumLength = 0;
364   if (branches == null) {
365       branches = new Vector();
366   }
367   branches.addElement(theBranch);
368   firstToken = lastToken = currentToken = null;
369       }
370       
371       // INTERVAL OPERATOR:
372       //  {x} | {x,} | {x,y}  (RE_INTERVALS && RE_NO_BK_BRACES)
373       //  \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)
374       //
375       // OPEN QUESTION: 
376       //  what is proper interpretation of '{' at start of string?
377 
378       else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
379   int newIndex = getMinMax(pattern,index,minMax,syntax);
380         if (newIndex > index) {
381           if (minMax.first > minMax.second)
382             throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
383           if (currentToken == null)
384             throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
385           if (currentToken instanceof RETokenRepeated) 
386             throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
387           if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
388             throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
389           if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))
390             throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);
391           index = newIndex;
392           currentToken = setRepeated(currentToken,minMax.first,minMax.second,index); 
393         }
394         else {
395           addToken(currentToken);
396           currentToken = new RETokenChar(subIndex,unit.ch,insens);
397         } 
398       }
399       
400       // LIST OPERATOR:
401       //  [...] | [^...]
402 
403       else if ((unit.ch == '[') && !(unit.bk || quot)) {
404   Vector options = new Vector();
405   boolean negative = false;
406   char lastChar = 0;
407   if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
408   
409   // Check for initial caret, negation
410   if ((ch = pattern[index]) == '^') {
411     negative = true;
412     if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
413     ch = pattern[index];
414   }
415 
416   // Check for leading right bracket literal
417   if (ch == ']') {
418     lastChar = ch;
419     if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
420   }
421 
422   while ((ch = pattern[index++]) != ']') {
423     if ((ch == '-') && (lastChar != 0)) {
424       if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
425       if ((ch = pattern[index]) == ']') {
426         options.addElement(new RETokenChar(subIndex,lastChar,insens));
427         lastChar = '-';
428       } else {
429         options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
430         lastChar = 0;
431         index++;
432       }
433           } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
434             if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
435       int posixID = -1;
436       boolean negate = false;
437             char asciiEsc = 0;
438       if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
439         switch (pattern[index]) {
440         case 'D':
441     negate = true;
442         case 'd':
443     posixID = RETokenPOSIX.DIGIT;
444     break;
445         case 'S':
446     negate = true;
447         case 's':
448     posixID = RETokenPOSIX.SPACE;
449     break;
450         case 'W':
451     negate = true;
452         case 'w':
453     posixID = RETokenPOSIX.ALNUM;
454     break;
455         }
456       }
457             else if ("nrt".indexOf(pattern[index]) != -1) {
458               switch (pattern[index]) {
459                 case 'n':
460                   asciiEsc = '\n';
461                   break;
462                 case 't':
463                   asciiEsc = '\t';
464                   break;
465                 case 'r':
466                   asciiEsc = '\r';
467                   break;
468               }
469             }
470       if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
471       
472       if (posixID != -1) {
473         options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
474       } else if (asciiEsc != 0) {
475         lastChar = asciiEsc;
476       } else {
477         lastChar = pattern[index];
478       }
479       ++index;
480     } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
481       StringBuffer posixSet = new StringBuffer();
482       index = getPosixSet(pattern,index+1,posixSet);
483       int posixId = RETokenPOSIX.intValue(posixSet.toString());
484       if (posixId != -1)
485         options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
486     } else {
487       if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
488       lastChar = ch;
489     }
490     if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
491   } // while in list
492   // Out of list, index is one past ']'
493       
494   if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
495       
496   // Create a new RETokenOneOf
497   addToken(currentToken);
498   options.trimToSize();
499   currentToken = new RETokenOneOf(subIndex,options,negative);
500       }
501 
502       // SUBEXPRESSIONS
503       //  (...) | \(...\) depending on RE_NO_BK_PARENS
504 
505       else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
506   boolean pure = false;
507   boolean comment = false;
508         boolean lookAhead = false;
509         boolean negativelh = false;
510   if ((index+1 < pLength) && (pattern[index] == '?')) {
511     switch (pattern[index+1]) {
512           case '!':
513             if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
514               pure = true;
515               negativelh = true;
516               lookAhead = true;
517               index += 2;
518             }
519             break;
520           case '=':
521             if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
522               pure = true;
523               lookAhead = true;
524               index += 2;
525             }
526             break;
527     case ':':
528       if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
529         pure = true;
530         index += 2;
531       }
532       break;
533     case '#':
534       if (syntax.get(RESyntax.RE_COMMENTS)) {
535         comment = true;
536       }
537       break;
538           default:
539             throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
540     }
541   }
542 
543   if (index >= pLength) {
544       throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
545   }
546 
547   // find end of subexpression
548   int endIndex = index;
549   int nextIndex = index;
550   int nested = 0;
551 
552   while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
553     && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )
554     if ((endIndex = nextIndex) >= pLength)
555       throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
556     else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
557       nested++;
558     else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
559       nested--;
560 
561   // endIndex is now position at a ')','\)' 
562   // nextIndex is end of string or position after ')' or '\)'
563 
564   if (comment) index = nextIndex;
565   else { // not a comment
566     // create RE subexpression as token.
567     addToken(currentToken);
568     if (!pure) {
569       numSubs++;
570     }
571 
572     int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;
573     currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
574     numSubs += ((RE) currentToken).getNumSubs();
575 
576           if (lookAhead) {
577         currentToken = new RETokenLookAhead(currentToken,negativelh);
578     }
579 
580     index = nextIndex;
581   } // not a comment
582       } // subexpression
583     
584       // UNMATCHED RIGHT PAREN
585       // ) or \) throw exception if
586       // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
587       else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
588   throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
589       }
590 
591       // START OF LINE OPERATOR
592       //  ^
593 
594       else if ((unit.ch == '^') && !(unit.bk || quot)) {
595   addToken(currentToken);
596   currentToken = null;
597   addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
598       }
599 
600       // END OF LINE OPERATOR
601       //  $
602 
603       else if ((unit.ch == '$') && !(unit.bk || quot)) {
604   addToken(currentToken);
605   currentToken = null;
606   addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
607       }
608 
609       // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
610       //  .
611 
612       else if ((unit.ch == '.') && !(unit.bk || quot)) {
613   addToken(currentToken);
614   currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
615       }
616 
617       // ZERO-OR-MORE REPEAT OPERATOR
618       //  *
619 
620       else if ((unit.ch == '*') && !(unit.bk || quot)) {
621   if (currentToken == null)
622           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
623   if (currentToken instanceof RETokenRepeated)
624           throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
625   if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
626     throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
627   if (currentToken.getMinimumLength() == 0)
628     throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
629   currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
630       }
631 
632       // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR
633       //  + | \+ depending on RE_BK_PLUS_QM
634       //  not available if RE_LIMITED_OPS is set
635 
636       else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
637   if (currentToken == null)
638           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
639   
640   // Check for possessive matching on RETokenRepeated
641   if (currentToken instanceof RETokenRepeated) {
642     RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
643     if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
644       tokenRep.makePossessive();
645     else
646       throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
647 
648   }
649   else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
650     throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
651   else if (currentToken.getMinimumLength() == 0)
652     throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
653   else
654     currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
655       }
656 
657       // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
658       //  ? | \? depending on RE_BK_PLUS_QM
659       //  not available if RE_LIMITED_OPS is set
660       //  stingy matching if RE_STINGY_OPS is set and it follows a quantifier
661 
662       else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
663   if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
664 
665   // Check for stingy matching on RETokenRepeated
666   if (currentToken instanceof RETokenRepeated) {
667     RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
668     if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
669       tokenRep.makeStingy();
670     else
671       throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
672   }
673   else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
674     throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
675   else
676     currentToken = setRepeated(currentToken,0,1,index);
677       }
678   
679       // BACKREFERENCE OPERATOR
680       //  \1 \2 ... \9
681       // not available if RE_NO_BK_REFS is set
682 
683       else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
684   addToken(currentToken);
685   currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
686       }
687 
688       // START OF STRING OPERATOR
689       //  \A if RE_STRING_ANCHORS is set
690       
691       else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
692   addToken(currentToken);
693   currentToken = new RETokenStart(subIndex,null);
694       }
695 
696       // WORD BREAK OPERATOR
697       //  \b if ????
698 
699       else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
700     addToken(currentToken);
701     currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
702       } 
703 
704       // WORD BEGIN OPERATOR 
705       //  \< if ????
706       else if (unit.bk && (unit.ch == '<')) {
707     addToken(currentToken);
708     currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
709       } 
710 
711       // WORD END OPERATOR 
712       //  \> if ????
713       else if (unit.bk && (unit.ch == '>')) {
714     addToken(currentToken);
715     currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
716       } 
717 
718       // NON-WORD BREAK OPERATOR
719       // \B if ????
720 
721       else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
722     addToken(currentToken);
723     currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
724       } 
725 
726       
727       // DIGIT OPERATOR
728       //  \d if RE_CHAR_CLASS_ESCAPES is set
729       
730       else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
731   addToken(currentToken);
732   currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
733       }
734 
735       // NON-DIGIT OPERATOR
736       //  \D
737 
738   else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
739     addToken(currentToken);
740     currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
741   }
742 
743   // NEWLINE ESCAPE
744         //  \n
745 
746   else if (unit.bk && (unit.ch == 'n')) {
747     addToken(currentToken);
748     currentToken = new RETokenChar(subIndex,'\n',false);
749   }
750 
751   // RETURN ESCAPE
752         //  \r
753 
754   else if (unit.bk && (unit.ch == 'r')) {
755     addToken(currentToken);
756     currentToken = new RETokenChar(subIndex,'\r',false);
757   }
758 
759   // WHITESPACE OPERATOR
760         //  \s if RE_CHAR_CLASS_ESCAPES is set
761 
762   else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
763     addToken(currentToken);
764     currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
765   }
766 
767   // NON-WHITESPACE OPERATOR
768         //  \S
769 
770   else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
771     addToken(currentToken);
772     currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
773   }
774 
775   // TAB ESCAPE
776         //  \t
777 
778   else if (unit.bk && (unit.ch == 't')) {
779     addToken(currentToken);
780     currentToken = new RETokenChar(subIndex,'\t',false);
781   }
782 
783   // ALPHANUMERIC OPERATOR
784         //  \w
785 
786   else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
787     addToken(currentToken);
788     currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
789   }
790 
791   // NON-ALPHANUMERIC OPERATOR
792         //  \W
793 
794   else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
795     addToken(currentToken);
796     currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
797   }
798 
799   // END OF STRING OPERATOR
800         //  \Z
801 
802   else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
803     addToken(currentToken);
804     currentToken = new RETokenEnd(subIndex,null);
805   }
806 
807   // NON-SPECIAL CHARACTER (or escape to make literal)
808         //  c | \* for example
809 
810   else {  // not a special character
811     addToken(currentToken);
812     currentToken = new RETokenChar(subIndex,unit.ch,insens);
813   } 
814       } // end while
815 
816     // Add final buffered token and an EndSub marker
817     addToken(currentToken);
818       
819     if (branches != null) {
820   branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
821   branches.trimToSize(); // compact the Vector
822   minimumLength = 0;
823   firstToken = lastToken = null;
824   addToken(new RETokenOneOf(subIndex,branches,false));
825     } 
826     else addToken(new RETokenEndSub(subIndex));
827 
828   }
829 
830   private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
831     unit.ch = input[index++];
832     unit.bk = (unit.ch == '\\'
833          && (!quot || index >= input.length || input[index] == 'E'));
834     if (unit.bk)
835       if (index < input.length)
836   unit.ch = input[index++];
837       else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
838     return index;
839   }
840 
841   /**
842    * Checks if the regular expression matches the input in its entirety.
843    *
844    * @param input The input text.
845    */
846   public boolean isMatch(Object input) {
847     return isMatch(input,0,0);
848   }
849   
850   /**
851    * Checks if the input string, starting from index, is an exact match of
852    * this regular expression.
853    *
854    * @param input The input text.
855    * @param index The offset index at which the search should be begin.
856    */
857   public boolean isMatch(Object input,int index) {
858     return isMatch(input,index,0);
859   }
860   
861 
862   /**
863    * Checks if the input, starting from index and using the specified
864    * execution flags, is an exact match of this regular expression.
865    *
866    * @param input The input text.
867    * @param index The offset index at which the search should be begin.
868    * @param eflags The logical OR of any execution flags above.
869    */
870   public boolean isMatch(Object input,int index,int eflags) {
871     return isMatchImpl(makeCharIndexed(input,index),index,eflags);
872   }
873 
874   private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
875     if (firstToken == null)  // Trivial case
876       return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
877     REMatch m = new REMatch(numSubs, index, eflags);
878     if (firstToken.match(input, m)) {
879   while (m != null) {
880       if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
881     return true;
882       }
883       m = m.next;
884   }
885     }
886     return false;
887   }
888     
889   /**
890    * Returns the maximum number of subexpressions in this regular expression.
891    * If the expression contains branches, the value returned will be the
892    * maximum subexpressions in any of the branches.
893    */
894   public int getNumSubs() {
895     return numSubs;
896   }
897 
898   // Overrides REToken.setUncle
899   void setUncle(REToken uncle) {
900       if (lastToken != null) {
901     lastToken.setUncle(uncle);
902       } else super.setUncle(uncle); // to deal with empty subexpressions
903   }
904 
905   // Overrides REToken.chain
906 
907   boolean chain(REToken next) {
908     super.chain(next);
909     setUncle(next);
910     return true;
911   }
912 
913   /**
914    * Returns the minimum number of characters that could possibly
915    * constitute a match of this regular expression.
916    */
917   public int getMinimumLength() {
918       return minimumLength;
919   }
920 
921   /**
922    * Returns an array of all matches found in the input.
923    *
924    * If the regular expression allows the empty string to match, it will
925    * substitute matches at all positions except the end of the input.
926    *
927    * @param input The input text.
928    * @return a non-null (but possibly zero-length) array of matches
929    */
930   public REMatch[] getAllMatches(Object input) {
931     return getAllMatches(input,0,0);
932   }
933 
934   /**
935    * Returns an array of all matches found in the input,
936    * beginning at the specified index position.
937    *
938    * If the regular expression allows the empty string to match, it will
939    * substitute matches at all positions except the end of the input.
940    *
941    * @param input The input text.
942    * @param index The offset index at which the search should be begin.
943    * @return a non-null (but possibly zero-length) array of matches
944    */
945   public REMatch[] getAllMatches(Object input, int index) {
946     return getAllMatches(input,index,0);
947   }
948 
949   /**
950    * Returns an array of all matches found in the input string,
951    * beginning at the specified index position and using the specified
952    * execution flags.
953    *
954    * If the regular expression allows the empty string to match, it will
955    * substitute matches at all positions except the end of the input.
956    *
957    * @param input The input text.
958    * @param index The offset index at which the search should be begin.
959    * @param eflags The logical OR of any execution flags above.
960    * @return a non-null (but possibly zero-length) array of matches
961    */
962   public REMatch[] getAllMatches(Object input, int index, int eflags) {
963     return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
964   }
965 
966   // this has been changed since 1.03 to be non-overlapping matches
967   private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
968     Vector all = new Vector();
969     REMatch m = null;
970     while ((m = getMatchImpl(input,index,eflags,null)) != null) {
971       all.addElement(m);
972       index = m.getEndIndex();
973       if (m.end[0] == 0) {   // handle pathological case of zero-length match
974   index++;
975   input.move(1);
976       } else {
977   input.move(m.end[0]);
978       }
979       if (!input.isValid()) break;
980     }
981     REMatch[] mset = new REMatch[all.size()];
982     all.copyInto(mset);
983     return mset;
984   }
985   
986     /* Implements abstract method REToken.match() */
987     boolean match(CharIndexed input, REMatch mymatch) { 
988   if (firstToken == null) return next(input, mymatch);
989 
990   // Note the start of this subexpression
991   mymatch.start[subIndex] = mymatch.index;
992 
993   return firstToken.match(input, mymatch);
994     }
995   
996   /**
997    * Returns the first match found in the input.  If no match is found,
998    * null is returned.
999    *
1000   * @param input The input text.
1001   * @return An REMatch instance referencing the match, or null if none.
1002   */
1003  public REMatch getMatch(Object input) {
1004    return getMatch(input,0,0);
1005  }
1006  
1007  /**
1008   * Returns the first match found in the input, beginning
1009   * the search at the specified index.  If no match is found,
1010   * returns null.
1011   *
1012   * @param input The input text.
1013   * @param index The offset within the text to begin looking for a match.
1014   * @return An REMatch instance referencing the match, or null if none.
1015   */
1016  public REMatch getMatch(Object input, int index) {
1017    return getMatch(input,index,0);
1018  }
1019  
1020  /**
1021   * Returns the first match found in the input, beginning
1022   * the search at the specified index, and using the specified
1023   * execution flags.  If no match is found, returns null.
1024   *
1025   * @param input The input text.
1026   * @param index The offset index at which the search should be begin.
1027   * @param eflags The logical OR of any execution flags above.
1028   * @return An REMatch instance referencing the match, or null if none.
1029   */
1030  public REMatch getMatch(Object input, int index, int eflags) {
1031    return getMatch(input,index,eflags,null);
1032  }
1033
1034  /**
1035   * Returns the first match found in the input, beginning the search
1036   * at the specified index, and using the specified execution flags.
1037   * If no match is found, returns null.  If a StringBuffer is
1038   * provided and is non-null, the contents of the input text from the
1039   * index to the beginning of the match (or to the end of the input,
1040   * if there is no match) are appended to the StringBuffer.
1041   *
1042   * @param input The input text.
1043   * @param index The offset index at which the search should be begin.
1044   * @param eflags The logical OR of any execution flags above.
1045   * @param buffer The StringBuffer to save pre-match text in.
1046   * @return An REMatch instance referencing the match, or null if none.  */
1047  public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
1048    return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1049  }
1050
1051  REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
1052      // Create a new REMatch to hold results
1053      REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1054      do {
1055    // Optimization: check if anchor + minimumLength > length
1056    if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1057        if (match(input, mymatch)) {
1058      // Find longest match of them all to observe leftmost longest
1059      REMatch longest = mymatch;
1060      while ((mymatch = mymatch.next) != null) {
1061          if (mymatch.index > longest.index) {
1062        longest = mymatch;
1063          }
1064      }
1065      
1066      longest.end[0] = longest.index;
1067      longest.finish(input);
1068      return longest;
1069        }
1070    }
1071    mymatch.clear(++anchor);
1072    // Append character to buffer if needed
1073    if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1074        buffer.append(input.charAt(0));
1075    }
1076      } while (input.move(1));
1077      
1078      // Special handling at end of input for e.g. "$"
1079      if (minimumLength == 0) {
1080    if (match(input, mymatch)) {
1081        mymatch.finish(input);
1082        return mymatch;
1083    }
1084      }
1085
1086      return null;
1087  }
1088
1089  /**
1090   * Returns an REMatchEnumeration that can be used to iterate over the
1091   * matches found in the input text.
1092   *
1093   * @param input The input text.
1094   * @return A non-null REMatchEnumeration instance.
1095   */
1096  public REMatchEnumeration getMatchEnumeration(Object input) {
1097    return getMatchEnumeration(input,0,0);
1098  }
1099
1100
1101  /**
1102   * Returns an REMatchEnumeration that can be used to iterate over the
1103   * matches found in the input text.
1104   *
1105   * @param input The input text.
1106   * @param index The offset index at which the search should be begin.
1107   * @return A non-null REMatchEnumeration instance, with its input cursor
1108   *  set to the index position specified.
1109   */
1110  public REMatchEnumeration getMatchEnumeration(Object input, int index) {
1111    return getMatchEnumeration(input,index,0);
1112  }
1113
1114  /**
1115   * Returns an REMatchEnumeration that can be used to iterate over the
1116   * matches found in the input text.
1117   *
1118   * @param input The input text.
1119   * @param index The offset index at which the search should be begin.
1120   * @param eflags The logical OR of any execution flags above.
1121   * @return A non-null REMatchEnumeration instance, with its input cursor
1122   *  set to the index position specified.
1123   */
1124  public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
1125    return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1126  }
1127
1128
1129  /**
1130   * Substitutes the replacement text for the first match found in the input.
1131   *
1132   * @param input The input text.
1133   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1134   * @return A String interpolating the substituted text.
1135   * @see REMatch#substituteInto
1136   */
1137  public String substitute(Object input,String replace) {
1138    return substitute(input,replace,0,0);
1139  }
1140
1141  /**
1142   * Substitutes the replacement text for the first match found in the input
1143   * beginning at the specified index position.  Specifying an index
1144   * effectively causes the regular expression engine to throw away the
1145   * specified number of characters. 
1146   *
1147   * @param input The input text.
1148   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1149   * @param index The offset index at which the search should be begin.
1150   * @return A String containing the substring of the input, starting
1151   *   at the index position, and interpolating the substituted text.
1152   * @see REMatch#substituteInto
1153   */
1154  public String substitute(Object input,String replace,int index) {
1155    return substitute(input,replace,index,0);
1156  }
1157
1158  /**
1159   * Substitutes the replacement text for the first match found in the input
1160   * string, beginning at the specified index position and using the
1161   * specified execution flags.
1162   *
1163   * @param input The input text.
1164   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1165   * @param index The offset index at which the search should be begin.
1166   * @param eflags The logical OR of any execution flags above.
1167   * @return A String containing the substring of the input, starting
1168   *   at the index position, and interpolating the substituted text.
1169   * @see REMatch#substituteInto
1170   */
1171  public String substitute(Object input,String replace,int index,int eflags) {
1172    return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1173  }
1174
1175  private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
1176    StringBuffer buffer = new StringBuffer();
1177    REMatch m = getMatchImpl(input,index,eflags,buffer);
1178    if (m==null) return buffer.toString();
1179    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1180       replace : m.substituteInto(replace) );
1181    if (input.move(m.end[0])) {
1182      do {
1183  buffer.append(input.charAt(0));
1184      } while (input.move(1));
1185    }
1186    return buffer.toString();
1187  }
1188  
1189  /**
1190   * Substitutes the replacement text for each non-overlapping match found 
1191   * in the input text.
1192   *
1193   * @param input The input text.
1194   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1195   * @return A String interpolating the substituted text.
1196   * @see REMatch#substituteInto
1197   */
1198  public String substituteAll(Object input,String replace) {
1199    return substituteAll(input,replace,0,0);
1200  }
1201
1202  /**
1203   * Substitutes the replacement text for each non-overlapping match found 
1204   * in the input text, starting at the specified index.
1205   *
1206   * If the regular expression allows the empty string to match, it will
1207   * substitute matches at all positions except the end of the input.
1208   *
1209   * @param input The input text.
1210   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1211   * @param index The offset index at which the search should be begin.
1212   * @return A String containing the substring of the input, starting
1213   *   at the index position, and interpolating the substituted text.
1214   * @see REMatch#substituteInto
1215   */
1216  public String substituteAll(Object input,String replace,int index) {
1217    return substituteAll(input,replace,index,0);
1218  }
1219 
1220  /**
1221   * Substitutes the replacement text for each non-overlapping match found 
1222   * in the input text, starting at the specified index and using the
1223   * specified execution flags.
1224   *
1225   * @param input The input text.
1226   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1227   * @param index The offset index at which the search should be begin.
1228   * @param eflags The logical OR of any execution flags above.
1229   * @return A String containing the substring of the input, starting
1230   *   at the index position, and interpolating the substituted text.
1231   * @see REMatch#substituteInto
1232   */
1233  public String substituteAll(Object input,String replace,int index,int eflags) {
1234    return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1235  }
1236
1237  private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
1238    StringBuffer buffer = new StringBuffer();
1239    REMatch m;
1240    while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1241  buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1242           replace : m.substituteInto(replace) );
1243      index = m.getEndIndex();
1244      if (m.end[0] == 0) {
1245  char ch = input.charAt(0);
1246  if (ch != CharIndexed.OUT_OF_BOUNDS) 
1247      buffer.append(ch);
1248  input.move(1);
1249      } else {
1250    input.move(m.end[0]);
1251      }
1252
1253      if (!input.isValid()) break;
1254    }
1255    return buffer.toString();
1256  }
1257  
1258  /* Helper function for constructor */
1259  private void addToken(REToken next) {
1260    if (next == null) return;
1261    minimumLength += next.getMinimumLength();
1262    if (firstToken == null) {
1263  lastToken = firstToken = next;
1264    } else {
1265      // if chain returns false, it "rejected" the token due to
1266      // an optimization, and next was combined with lastToken
1267      if (lastToken.chain(next)) {
1268    lastToken = next;
1269      }
1270    }
1271  }
1272
1273  private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1274    if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1275    return new RETokenRepeated(current.subIndex,current,min,max);
1276  }
1277
1278  private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
1279    // Precondition: pattern[index-1] == ':'
1280    // we will return pos of closing ']'.
1281    int i;
1282    for (i=index; i<(pattern.length-1); i++) {
1283      if ((pattern[i] == ':') && (pattern[i+1] == ']'))
1284  return i+2;
12