1 /*
2 * $Id: prefixExample.java,v 1.7 2003/11/07 20:16:23 dfs Exp $
3 *
4 * ====================================================================
5 * The Apache Software License, Version 1.1
6 *
7 * Copyright (c) 2000 The Apache Software Foundation. All rights
8 * reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 *
22 * 3. The end-user documentation included with the redistribution,
23 * if any, must include the following acknowledgment:
24 * "This product includes software developed by the
25 * Apache Software Foundation (http://www.apache.org/)."
26 * Alternately, this acknowledgment may appear in the software itself,
27 * if and wherever such third-party acknowledgments normally appear.
28 *
29 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
30 * must not be used to endorse or promote products derived from this
31 * software without prior written permission. For written
32 * permission, please contact apache@apache.org.
33 *
34 * 5. Products derived from this software may not be called "Apache"
35 * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
36 * name, without prior written permission of the Apache Software Foundation.
37 *
38 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
39 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
40 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
41 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
45 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
46 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
47 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
48 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49 * SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This software consists of voluntary contributions made by many
53 * individuals on behalf of the Apache Software Foundation. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58
59 package examples.awk;
60
61 import org.apache.oro.text.regex;
62 import org.apache.oro.text.awk;
63
64 /**
65 * This is a test program demonstrating an application of the matchesPrefix()
66 * methods. This example program shows how you might tokenize a stream of
67 * input using whitespace as a token separator. Don't forget to use quotes
68 * around the input on the command line, e.g.
69 * java prefixExample "Test to see if 1.0 is real and 2 is an integer"
70 *
71 * If you don't need the power of a full blown lexer generator, you can
72 * easily use regular expressions to create your own tokenization and
73 * simple parsing classes using similar approaches. This example is
74 * rather sloppy. If you look at the equivalent example in the OROMatcher
75 * distribution, you'll see how to Perl's zero-width look ahead assertion
76 * makes correctness easier to achieve.
77 *
78 * @version @version@
79 */
80 public final class prefixExample {
81 public static final int REAL = 0;
82 public static final int INTEGER = 1;
83 public static final int STRING = 2;
84
85 public static final String[] types = { "Real", "Integer", "String" };
86 public static final String whitespace = "[ \t\n\r]+";
87 public static final String[] tokens = {
88 "-?[0-9]*\\.[0-9]+([eE]-?[0-9]+)?", "-?[0-9]+", "[^ \t\n\r]+"
89 };
90
91 public static final void main(String args[]) {
92 int token;
93 PatternMatcherInput input;
94 PatternMatcher matcher;
95 PatternCompiler compiler;
96 Pattern[] patterns;
97 Pattern tokenSeparator = null;
98 MatchResult result;
99
100 if(args.length < 1) {
101 System.err.println("Usage: prefixExample <sample input>");
102 System.exit(1);
103 }
104
105 input = new PatternMatcherInput(args[0]);
106 compiler = new AwkCompiler();
107 patterns = new Pattern[tokens.length];
108
109 try {
110 tokenSeparator = compiler.compile(whitespace);
111 for(token=0; token < tokens.length; token++)
112 patterns[token] = compiler.compile(tokens[token]);
113 } catch(MalformedPatternException e) {
114 System.err.println("Bad pattern.");
115 e.printStackTrace();
116 System.exit(1);
117 }
118
119 matcher = new AwkMatcher();
120
121 _whileLoop:
122 while(!input.endOfInput()) {
123 for(token = 0; token < tokens.length; token++)
124 if(matcher.matchesPrefix(input, patterns[token])) {
125 int offset;
126 result = matcher.getMatch();
127 offset = input.getCurrentOffset();
128 input.setCurrentOffset(result.endOffset(0));
129
130 if(matcher.matchesPrefix(input, tokenSeparator)) {
131 input.setCurrentOffset(matcher.getMatch().endOffset(0));
132 System.out.println(types[token] + ": " + result);
133 continue _whileLoop;
134 } else if(input.endOfInput()) {
135 System.out.println(types[token] + ": " + result);
136 break _whileLoop;
137 }
138
139 input.setCurrentOffset(offset);
140 }
141
142 if(matcher.matchesPrefix(input, tokenSeparator))
143 input.setCurrentOffset(matcher.getMatch().endOffset(0));
144 else {
145 System.err.println("Unrecognized token starting at offset: " +
146 input.getCurrentOffset());
147 break;
148 }
149 }
150
151 }
152 }