Source code: org/apache/oro/text/awk/AwkMatcher.java
1 /*
2 * $Id: AwkMatcher.java,v 1.11 2003/11/07 20:16:24 dfs Exp $
3 *
4 * ====================================================================
5 * The Apache Software License, Version 1.1
6 *
7 * Copyright (c) 2000 The Apache Software Foundation. All rights
8 * reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 *
22 * 3. The end-user documentation included with the redistribution,
23 * if any, must include the following acknowledgment:
24 * "This product includes software developed by the
25 * Apache Software Foundation (http://www.apache.org/)."
26 * Alternately, this acknowledgment may appear in the software itself,
27 * if and wherever such third-party acknowledgments normally appear.
28 *
29 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
30 * must not be used to endorse or promote products derived from this
31 * software without prior written permission. For written
32 * permission, please contact apache@apache.org.
33 *
34 * 5. Products derived from this software may not be called "Apache"
35 * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
36 * name, without prior written permission of the Apache Software Foundation.
37 *
38 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
39 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
40 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
41 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
45 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
46 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
47 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
48 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49 * SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This software consists of voluntary contributions made by many
53 * individuals on behalf of the Apache Software Foundation. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58
59 package org.apache.oro.text.awk;
60
61 import java.io.*;
62
63 import org.apache.oro.text.regex.*;
64
65 /**
66 * The AwkMatcher class is used to match regular expressions
67 * (conforming to the Awk regular expression syntax) generated by
68 * AwkCompiler. AwkMatcher only supports 8-bit ASCII. Any attempt
69 * to match Unicode values greater than 255 will result in undefined
70 * behavior. AwkMatcher finds true leftmost-longest matches, so
71 * you must take care with how you formulate your regular expression
72 * to avoid matching more than you really want.
73 * <p>
74 * It is important for you to remember that AwkMatcher does not save
75 * parenthesized sub-group information. Therefore the number of groups
76 * saved in a MatchResult produced by AwkMatcher will always be 1.
77 *
78 * @version @version@
79 * @since 1.0
80 * @see org.apache.oro.text.regex.PatternMatcher
81 * @see AwkCompiler
82 */
83 public final class AwkMatcher implements PatternMatcher {
84 private int __lastMatchedBufferOffset;
85 private AwkMatchResult __lastMatchResult = null;
86 private AwkStreamInput __scratchBuffer, __streamSearchBuffer;
87 private AwkPattern __awkPattern;
88 private int __offsets[] = new int[2];
89
90 /**
91 * A kluge variable to make PatternMatcherInput matches work when
92 * their begin offset is non-zero. This kluge is caused by the
93 * misguided notion that AwkStreamInput could be overloaded to do
94 * both stream and fixed buffer matches. The whole input representation
95 * scheme has to be scrapped and redone. -- dfs 2001/07/10
96 */
97 private int __beginOffset;
98
99 public AwkMatcher() {
100 __scratchBuffer = new AwkStreamInput();
101 __scratchBuffer._endOfStreamReached = true;
102 }
103
104 /**
105 * Determines if a prefix of a string (represented as a char[])
106 * matches a given pattern, starting from a given offset into the string.
107 * If a prefix of the string matches the pattern, a MatchResult instance
108 * representing the match is made accesible via
109 * {@link #getMatch()}.
110 * <p>
111 * This method is useful for certain common token identification tasks
112 * that are made more difficult without this functionality.
113 * <p>
114 * @param input The char[] to test for a prefix match.
115 * @param pattern The Pattern to be matched.
116 * @param offset The offset at which to start searching for the prefix.
117 * @return True if input matches pattern, false otherwise.
118 */
119 // I reimplemented this method in terms of streammatchesPrefix
120 // to reduce the code size. This is not very elegant and
121 // reduces performance by a small degree.
122 public boolean matchesPrefix(char[] input, Pattern pattern, int offset){
123 int result = -1;
124
125 __awkPattern = (AwkPattern)pattern;
126
127 __scratchBuffer._buffer = input;
128 __scratchBuffer._bufferSize = input.length;
129 __scratchBuffer._bufferOffset = __beginOffset = 0;
130 __scratchBuffer._endOfStreamReached = true;
131 __streamSearchBuffer = __scratchBuffer;
132 __offsets[0] = offset;
133 try {
134 result = __streamMatchPrefix();
135 } catch(IOException e){
136 // Don't do anything because we're not doing any I/O
137 result = -1;
138 }
139
140 if(result < 0) {
141 __lastMatchResult = null;
142 return false;
143 }
144
145 __lastMatchResult =
146 new AwkMatchResult(new String(input, 0, result), offset);
147
148 return true;
149 }
150
151
152 /**
153 * Determines if a prefix of a string (represented as a char[])
154 * matches a given pattern.
155 * If a prefix of the string matches the pattern, a MatchResult instance
156 * representing the match is made accesible via
157 * {@link #getMatch()}.
158 * <p>
159 * This method is useful for certain common token identification tasks
160 * that are made more difficult without this functionality.
161 * <p>
162 * @param input The char[] to test for a prefix match.
163 * @param pattern The Pattern to be matched.
164 * @return True if input matches pattern, false otherwise.
165 */
166 public boolean matchesPrefix(char[] input, Pattern pattern){
167 return matchesPrefix(input, pattern, 0);
168 }
169
170
171 /**
172 * Determines if a prefix of a string matches a given pattern.
173 * If a prefix of the string matches the pattern, a MatchResult instance
174 * representing the match is made accesible via
175 * {@link #getMatch()}.
176 * <p>
177 * This method is useful for certain common token identification tasks
178 * that are made more difficult without this functionality.
179 * <p>
180 * @param input The String to test for a prefix match.
181 * @param pattern The Pattern to be matched.
182 * @return True if input matches pattern, false otherwise.
183 */
184 public boolean matchesPrefix(String input, Pattern pattern) {
185 return matchesPrefix(input.toCharArray(), pattern, 0);
186 }
187
188
189 /**
190 * Determines if a prefix of a PatternMatcherInput instance
191 * matches a given pattern. If there is a match, a MatchResult instance
192 * representing the match is made accesible via
193 * {@link #getMatch()}. Unlike the
194 * {@link #contains(PatternMatcherInput, Pattern)}
195 * method, the current offset of the PatternMatcherInput argument
196 * is not updated. You should remember that the region starting
197 * from the begin offset of the PatternMatcherInput will be
198 * tested for a prefix match.
199 * <p>
200 * This method is useful for certain common token identification tasks
201 * that are made more difficult without this functionality.
202 * <p>
203 * @param input The PatternMatcherInput to test for a prefix match.
204 * @param pattern The Pattern to be matched.
205 * @return True if input matches pattern, false otherwise.
206 */
207 public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern){
208 int result = -1;
209
210 __awkPattern = (AwkPattern)pattern;
211 __scratchBuffer._buffer = input.getBuffer();
212 __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
213 __offsets[0] = input.getCurrentOffset();
214
215 __scratchBuffer._bufferSize = input.length();
216 __scratchBuffer._endOfStreamReached = true;
217 __streamSearchBuffer = __scratchBuffer;
218 try {
219 result = __streamMatchPrefix();
220 } catch(IOException e) {
221 // Don't do anything because we're not doing any I/O
222 result = -1;
223 }
224
225 if(result < 0) {
226 __lastMatchResult = null;
227 return false;
228 }
229
230 __lastMatchResult =
231 new AwkMatchResult(new String(__scratchBuffer._buffer, __offsets[0],
232 result), __offsets[0]);
233
234 return true;
235 }
236
237
238
239 /**
240 * Determines if a string (represented as a char[]) exactly
241 * matches a given pattern. If
242 * there is an exact match, a MatchResult instance
243 * representing the match is made accesible via
244 * {@link #getMatch()}. The pattern must be
245 * an AwkPattern instance, otherwise a ClassCastException will
246 * be thrown. You are not required to, and indeed should NOT try to
247 * (for performance reasons), catch a ClassCastException because it
248 * will never be thrown as long as you use an AwkPattern as the pattern
249 * parameter.
250 * <p>
251 * @param input The char[] to test for an exact match.
252 * @param pattern The AwkPattern to be matched.
253 * @return True if input matches pattern, false otherwise.
254 * @exception ClassCastException If a Pattern instance other than an
255 * AwkPattern is passed as the pattern parameter.
256 */
257 public boolean matches(char[] input, Pattern pattern) {
258 int result = -1;
259
260 __awkPattern = (AwkPattern)pattern;
261 __scratchBuffer._buffer = input;
262 __scratchBuffer._bufferSize = input.length;
263 __scratchBuffer._bufferOffset = __beginOffset = 0;
264 __scratchBuffer._endOfStreamReached = true;
265 __streamSearchBuffer = __scratchBuffer;
266 __offsets[0] = 0;
267 try {
268 result = __streamMatchPrefix();
269 } catch(IOException e){
270 // Don't do anything because we're not doing any I/O
271 result = -1;
272 }
273
274 if(result != input.length) {
275 __lastMatchResult = null;
276 return false;
277 }
278
279 __lastMatchResult =
280 new AwkMatchResult(new String(input, 0, result), 0);
281
282 return true;
283 }
284
285
286
287
288 /**
289 * Determines if a string exactly matches a given pattern. If
290 * there is an exact match, a MatchResult instance
291 * representing the match is made accesible via
292 * {@link #getMatch()}. The pattern must be
293 * a AwkPattern instance, otherwise a ClassCastException will
294 * be thrown. You are not required to, and indeed should NOT try to
295 * (for performance reasons), catch a ClassCastException because it
296 * will never be thrown as long as you use an AwkPattern as the pattern
297 * parameter.
298 * <p>
299 * @param input The String to test for an exact match.
300 * @param pattern The AwkPattern to be matched.
301 * @return True if input matches pattern, false otherwise.
302 * @exception ClassCastException If a Pattern instance other than an
303 * AwkPattern is passed as the pattern parameter.
304 */
305 public boolean matches(String input, Pattern pattern){
306 return matches(input.toCharArray(), pattern);
307 }
308
309
310 /**
311 * Determines if the contents of a PatternMatcherInput instance
312 * exactly matches a given pattern. If
313 * there is an exact match, a MatchResult instance
314 * representing the match is made accesible via
315 * {@link #getMatch()}. Unlike the
316 * {@link #contains(PatternMatcherInput, Pattern)}
317 * method, the current offset of the PatternMatcherInput argument
318 * is not updated. You should remember that the region between
319 * the begin and end offsets of the PatternMatcherInput will be
320 * tested for an exact match.
321 * <p>
322 * The pattern must be an AwkPattern instance, otherwise a
323 * ClassCastException will be thrown. You are not required to, and
324 * indeed should NOT try to (for performance reasons), catch a
325 * ClassCastException because it will never be thrown as long as you use
326 * an AwkPattern as the pattern parameter.
327 * <p>
328 * @param input The PatternMatcherInput to test for a match.
329 * @param pattern The AwkPattern to be matched.
330 * @return True if input matches pattern, false otherwise.
331 * @exception ClassCastException If a Pattern instance other than an
332 * AwkPattern is passed as the pattern parameter.
333 */
334 public boolean matches(PatternMatcherInput input, Pattern pattern){
335 int result = -1;
336
337 __awkPattern = (AwkPattern)pattern;
338 __scratchBuffer._buffer = input.getBuffer();
339 __scratchBuffer._bufferSize = input.length();
340 __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
341 __offsets[0] = input.getBeginOffset();
342 __scratchBuffer._endOfStreamReached = true;
343 __streamSearchBuffer = __scratchBuffer;
344 try {
345 result = __streamMatchPrefix();
346 } catch(IOException e){
347 // Don't do anything because we're not doing any I/O
348 result = -1;
349 }
350
351 if(result != __scratchBuffer._bufferSize) {
352 __lastMatchResult = null;
353 return false;
354 }
355
356 __lastMatchResult =
357 new AwkMatchResult(new String(__scratchBuffer._buffer, __offsets[0],
358 __scratchBuffer._bufferSize), __offsets[0]);
359
360 return true;
361 }
362
363
364
365 /**
366 * Determines if a string (represented as a char[]) contains a pattern.
367 * If the pattern is
368 * matched by some substring of the input, a MatchResult instance
369 * representing the <b> first </b> such match is made acessible via
370 * {@link #getMatch()}. If you want to access
371 * subsequent matches you should either use a PatternMatcherInput object
372 * or use the offset information in the MatchResult to create a substring
373 * representing the remaining input. Using the MatchResult offset
374 * information is the recommended method of obtaining the parts of the
375 * string preceeding the match and following the match.
376 * <p>
377 * The pattern must be an AwkPattern instance, otherwise a
378 * ClassCastException will be thrown. You are not required to, and
379 * indeed should NOT try to (for performance reasons), catch a
380 * ClassCastException because it will never be thrown as long as you use
381 * an AwkPattern as the pattern parameter.
382 * <p>
383 * @param input The char[] to test for a match.
384 * @param pattern The AwkPattern to be matched.
385 * @return True if the input contains a pattern match, false otherwise.
386 * @exception ClassCastException If a Pattern instance other than an
387 * AwkPattern is passed as the pattern parameter.
388 */
389 public boolean contains(char[] input, Pattern pattern) {
390 __awkPattern = (AwkPattern)pattern;
391
392 // Begin anchor requires match occur at beginning of input
393 if(__awkPattern._hasBeginAnchor && !__awkPattern._fastMap[input[0]]){
394 __lastMatchResult = null;
395 return false;
396 }
397
398 __scratchBuffer._buffer = input;
399 __scratchBuffer._bufferSize = input.length;
400 __scratchBuffer._bufferOffset = __beginOffset = 0;
401 __scratchBuffer._endOfStreamReached = true;
402 __streamSearchBuffer = __scratchBuffer;
403 __lastMatchedBufferOffset = 0;
404 try {
405 _search();
406 } catch(IOException e) {
407 // do nothing
408 }
409 return (__lastMatchResult != null);
410 }
411
412
413 /**
414 * Determines if a string contains a pattern. If the pattern is
415 * matched by some substring of the input, a MatchResult instance
416 * representing the <b> first </b> such match is made acessible via
417 * {@link #getMatch()}. If you want to access
418 * subsequent matches you should either use a PatternMatcherInput object
419 * or use the offset information in the MatchResult to create a substring
420 * representing the remaining input. Using the MatchResult offset
421 * information is the recommended method of obtaining the parts of the
422 * string preceeding the match and following the match.
423 * <p>
424 * The pattern must be an AwkPattern instance, otherwise a
425 * ClassCastException will be thrown. You are not required to, and
426 * indeed should NOT try to (for performance reasons), catch a
427 * ClassCastException because it will never be thrown as long as you use
428 * an AwkPattern as the pattern parameter.
429 * <p>
430 * @param input The String to test for a match.
431 * @param pattern The AwkPattern to be matched.
432 * @return True if the input contains a pattern match, false otherwise.
433 * @exception ClassCastException If a Pattern instance other than an
434 * AwkPattern is passed as the pattern parameter.
435 */
436 public boolean contains(String input, Pattern pattern){
437 return contains(input.toCharArray(), pattern);
438 }
439
440
441
442 /**
443 * Determines if the contents of a PatternMatcherInput, starting from the
444 * current offset of the input contains a pattern.
445 * If a pattern match is found, a MatchResult
446 * instance representing the <b>first</b> such match is made acessible via
447 * {@link #getMatch()}. The current offset of the
448 * PatternMatcherInput is set to the offset corresponding to the end
449 * of the match, so that a subsequent call to this method will continue
450 * searching where the last call left off. You should remember that the
451 * region between the begin and end offsets of the PatternMatcherInput are
452 * considered the input to be searched, and that the current offset
453 * of the PatternMatcherInput reflects where a search will start from.
454 * Matches extending beyond the end offset of the PatternMatcherInput
455 * will not be matched. In other words, a match must occur entirely
456 * between the begin and end offsets of the input. See
457 * {@link org.apache.oro.text.regex.PatternMatcherInput PatternMatcherInput}
458 * for more details.
459 * <p>
460 * As a side effect, if a match is found, the PatternMatcherInput match
461 * offset information is updated. See the PatternMatcherInput
462 * {@link org.apache.oro.text.regex.PatternMatcherInput#setMatchOffsets
463 * setMatchOffsets(int, int)} method for more details.
464 * <p>
465 * The pattern must be an AwkPattern instance, otherwise a
466 * ClassCastException will be thrown. You are not required to, and
467 * indeed should NOT try to (for performance reasons), catch a
468 * ClassCastException because it will never be thrown as long as you use
469 * an AwkPattern as the pattern parameter.
470 * <p>
471 * This method is usually used in a loop as follows:
472 * <blockquote><pre>
473 * PatternMatcher matcher;
474 * PatternCompiler compiler;
475 * Pattern pattern;
476 * PatternMatcherInput input;
477 * MatchResult result;
478 *
479 * compiler = new AwkCompiler();
480 * matcher = new AwkMatcher();
481 *
482 * try {
483 * pattern = compiler.compile(somePatternString);
484 * } catch(MalformedPatternException e) {
485 * System.err.println("Bad pattern.");
486 * System.err.println(e.getMessage());
487 * return;
488 * }
489 *
490 * input = new PatternMatcherInput(someStringInput);
491 *
492 * while(matcher.contains(input, pattern)) {
493 * result = matcher.getMatch();
494 * // Perform whatever processing on the result you want.
495 * }
496 *
497 * </pre></blockquote>
498 * <p>
499 * @param input The PatternMatcherInput to test for a match.
500 * @param pattern The Pattern to be matched.
501 * @return True if the input contains a pattern match, false otherwise.
502 * @exception ClassCastException If a Pattern instance other than an
503 * AwkPattern is passed as the pattern parameter.
504 */
505 public boolean contains(PatternMatcherInput input, Pattern pattern) {
506 __awkPattern = (AwkPattern)pattern;
507 __scratchBuffer._buffer = input.getBuffer();
508 __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
509 __lastMatchedBufferOffset = input.getCurrentOffset();
510
511 // Begin anchor requires match occur at beginning of input
512 // No need to adjust current offset if no match found.
513 if(__awkPattern._hasBeginAnchor) {
514 if(__beginOffset != __lastMatchedBufferOffset ||
515 !__awkPattern._fastMap[__scratchBuffer._buffer[__beginOffset]]) {
516 __lastMatchResult = null;
517 return false;
518 }
519 }
520
521 __scratchBuffer._bufferSize = input.length();
522 __scratchBuffer._endOfStreamReached = true;
523 __streamSearchBuffer = __scratchBuffer;
524 try {
525 _search();
526 } catch(IOException e) {
527 // do nothing
528 }
529 input.setCurrentOffset(__lastMatchedBufferOffset);
530
531 if(__lastMatchResult == null)
532 return false;
533
534 input.setMatchOffsets(__lastMatchResult.beginOffset(0),
535 __lastMatchResult.endOffset(0));
536
537 return true;
538 }
539
540
541 /**
542 * Determines if the contents of an AwkStreamInput, starting from the
543 * current offset of the input contains a pattern.
544 * If a pattern match is found, a MatchResult
545 * instance representing the <b>first</b> such match is made acessible via
546 * {@link #getMatch()}. The current offset of the
547 * input stream is advanced to the end offset corresponding to the end
548 * of the match. Consequently a subsequent call to this method will continue
549 * searching where the last call left off.
550 * See {@link AwkStreamInput} for more details.
551 * <p>
552 * Note, patterns matching the null string do NOT match at end of input
553 * stream. This is different from the behavior you get from the other
554 * contains() methods.
555 * <p>
556 * The pattern must be an AwkPattern instance, otherwise a
557 * ClassCastException will be thrown. You are not required to, and
558 * indeed should NOT try to (for performance reasons), catch a
559 * ClassCastException because it will never be thrown as long as you use
560 * an AwkPattern as the pattern parameter.
561 * <p>
562 * This method is usually used in a loop as follows:
563 * <blockquote><pre>
564 * PatternMatcher matcher;
565 * PatternCompiler compiler;
566 * Pattern pattern;
567 * AwkStreamInput input;
568 * MatchResult result;
569 *
570 * compiler = new AwkCompiler();
571 * matcher = new AwkMatcher();
572 *
573 * try {
574 * pattern = compiler.compile(somePatternString);
575 * } catch(MalformedPatternException e) {
576 * System.err.println("Bad pattern.");
577 * System.err.println(e.getMessage());
578 * return;
579 * }
580 *
581 * input = new AwkStreamInput(
582 * new BufferedInputStream(new FileInputStream(someFileName)));
583 *
584 * while(matcher.contains(input, pattern)) {
585 * result = matcher.getMatch();
586 * // Perform whatever processing on the result you want.
587 * }
588 *
589 * </pre></blockquote>
590 * <p>
591 * @param input The PatternStreamInput to test for a match.
592 * @param pattern The Pattern to be matched.
593 * @return True if the input contains a pattern match, false otherwise.
594 * @exception ClassCastException If a Pattern instance other than an
595 * AwkPattern is passed as the pattern parameter.
596 */
597 public boolean contains(AwkStreamInput input, Pattern pattern)
598 throws IOException
599 {
600 __awkPattern = (AwkPattern)pattern;
601
602 // Begin anchor requires match occur at beginning of input
603 if(__awkPattern._hasBeginAnchor) {
604 // Do read here instead of in _search() so we can test first char
605 if(input._bufferOffset == 0) {
606 if(input.read() && !__awkPattern._fastMap[input._buffer[0]]) {
607 __lastMatchResult = null;
608 return false;
609 }
610 } else {
611 __lastMatchResult = null;
612 return false;
613 }
614 }
615
616 __lastMatchedBufferOffset = input._currentOffset;
617 __streamSearchBuffer = input;
618 __beginOffset = 0;
619 _search();
620 input._currentOffset = __lastMatchedBufferOffset;
621
622 if(__lastMatchResult != null) {
623 // Adjust match begin offset to be relative to beginning of stream.
624 __lastMatchResult._incrementMatchBeginOffset(input._bufferOffset);
625 return true;
626 }
627
628 return false;
629 }
630
631
632 private int __streamMatchPrefix() throws IOException {
633 int token, current = AwkPattern._START_STATE, lastState;
634 int offset, initialOffset, maxOffset;
635 int lastMatchedOffset = -1;
636 int[] tstateArray;
637
638 offset = initialOffset = __offsets[0];
639 maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
640
641 test:
642 while(offset < maxOffset) {
643 token = __streamSearchBuffer._buffer[offset++];
644
645 if(current < __awkPattern._numStates) {
646 lastState = current;
647 tstateArray = __awkPattern._getStateArray(current);
648 current = tstateArray[token];
649
650 if(current == 0){
651 __awkPattern._createNewState(lastState, token, tstateArray);
652 current = tstateArray[token];
653 }
654
655 if(current == AwkPattern._INVALID_STATE){
656 break test;
657 } else if(__awkPattern._endStates.get(current)){
658 lastMatchedOffset = offset;
659 }
660
661 if(offset == maxOffset){
662 offset =
663 __streamSearchBuffer._reallocate(initialOffset) + __beginOffset;
664
665 maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
666
667 // If we're at the end of the stream, don't reset values
668 if(offset != maxOffset){
669 if(lastMatchedOffset != -1)
670 lastMatchedOffset-=initialOffset;
671 initialOffset = 0;
672 }
673 }
674 } else
675 break;
676 }
677
678 __offsets[0] = initialOffset;
679 __offsets[1] = lastMatchedOffset - 1;
680
681 if(lastMatchedOffset == -1 && __awkPattern._matchesNullString)
682 return 0;
683
684 // End anchor requires match occur at end of input
685 if(__awkPattern._hasEndAnchor &&
686 (!__streamSearchBuffer._endOfStreamReached ||
687 lastMatchedOffset < __streamSearchBuffer._bufferSize + __beginOffset))
688 return -1;
689
690 return (lastMatchedOffset - initialOffset);
691 }
692
693
694
695 void _search() throws IOException {
696 int position, tokensMatched;
697
698 __lastMatchResult = null;
699
700 while(true){
701 if(__lastMatchedBufferOffset >=
702 __streamSearchBuffer._bufferSize + __beginOffset) {
703 if(__streamSearchBuffer._endOfStreamReached){
704 // Get rid of reference now that it should no longer be used.
705 __streamSearchBuffer = null;
706 return;
707 } else {
708 if(!__streamSearchBuffer.read())
709 return;
710 __lastMatchedBufferOffset = 0;
711 }
712 }
713
714 for(position = __lastMatchedBufferOffset;
715 position < __streamSearchBuffer._bufferSize + __beginOffset;
716 position = __offsets[0] + 1) {
717
718 __offsets[0] = position;
719 if(__awkPattern._fastMap[__streamSearchBuffer._buffer[position]] &&
720 (tokensMatched = __streamMatchPrefix()) > -1) {
721
722 __lastMatchResult = new AwkMatchResult(
723 new String(__streamSearchBuffer._buffer, __offsets[0],
724 tokensMatched), __offsets[0]);
725
726 __lastMatchedBufferOffset =
727 (tokensMatched > 0 ? __offsets[1] + 1 : __offsets[0] + 1);
728
729 return;
730 } else if(__awkPattern._matchesNullString) {
731 __lastMatchResult = new AwkMatchResult(new String(), position);
732
733 __lastMatchedBufferOffset = position + 1;
734
735 return;
736 }
737 }
738
739 __lastMatchedBufferOffset = position;
740 }
741 }
742
743
744 /**
745 * Fetches the last match found by a call to a matches() or contains()
746 * method.
747 * <p>
748 * @return A MatchResult instance containing the pattern match found
749 * by the last call to any one of the matches() or contains()
750 * methods. If no match was found by the last call, returns
751 * null.
752 */
753 public MatchResult getMatch() { return __lastMatchResult; }
754
755 }
756
757
758
759