Source code: org/bdgp/apps/dagedit/dataadapter/GOFlatFileTokenizer.java
1 package org.bdgp.apps.dagedit.dataadapter;
2
3 import java.util.*;
4 import java.net.URL;
5
6 public class GOFlatFileTokenizer {
7
8 Hashtable skipchars;
9 Hashtable keepertokens;
10 Hashtable boundaryChars;
11 char escapeChar = '\\';
12 String parseme;
13 int currentloc;
14 int lineNum;
15 String currentToken;
16 String filename;
17
18 public GOFlatFileTokenizer(URL filename, String parseme, int lineNum) {
19 this(filename.toString(), parseme, lineNum);
20 }
21
22 public GOFlatFileTokenizer(String filename, String parseme, int lineNum) {
23 skipchars = new Hashtable();
24 keepertokens = new Hashtable();
25 boundaryChars = new Hashtable();
26 setString(filename, parseme, lineNum);
27 }
28
29 public void setString(String filename, String parseme, int lineNum) {
30 this.filename = filename;
31 this.parseme = parseme;
32 this.lineNum = lineNum;
33 currentloc = 0;
34 }
35
36 protected boolean skipGarbage() {
37 int loc = currentloc;
38 for( ; loc < parseme.length(); loc++) {
39 if (!skipchars.containsKey(new Character(parseme.charAt(loc)))) {
40 break;
41 }
42 }
43 boolean skippedSomething = (currentloc != loc);
44 currentloc = loc;
45 return skippedSomething;
46 }
47
48 public GOToken getNextToken() throws GOFlatFileParseException {
49 boolean flush = !skipGarbage();
50 int loc = currentloc;
51 int start = currentloc;
52
53 Character boundaryChar = null;
54
55 if (loc >= parseme.length())
56 return null;
57 for(; loc < parseme.length(); loc++) {
58 Character c = new Character(parseme.charAt(loc));
59 if (c.charValue() == escapeChar) {
60 loc = loc + 1;
61 if (loc >= parseme.length())
62 throw new GOFlatFileParseException(
63 "Illegal escape character at end of line",
64 filename,
65 parseme,
66 lineNum,
67 parseme.length() - 1);
68 continue;
69 }
70
71 if (skipchars.containsKey(c)) {
72 currentloc = loc;
73 break;
74 }
75
76 if (keepertokens.containsKey(c)) {
77 if (start == loc) {
78 currentloc = loc+1;
79 return new GOToken(c+"",
80 filename,
81 parseme,
82 lineNum,
83 loc+1,
84 flush);
85 } else {
86 currentloc = loc;
87 break;
88 }
89 }
90
91 if (boundaryChar != null && boundaryChar.equals(c)) {
92 loc = loc + 1;
93 currentloc = loc;
94 break;
95 }
96
97 if (boundaryChars.contains(c)) {
98 boundaryChar = c;
99 }
100 }
101 if (loc >= parseme.length()) {
102 currentloc = loc+1;
103 }
104
105 GOToken out = new GOToken(unescape(parseme.substring(start, loc)),
106 filename,
107 parseme,
108 lineNum,
109 start+1,
110 flush);
111 return out;
112 }
113
114 public String unescape(String in) {
115 StringBuffer out = new StringBuffer();
116 for(int i=0;i < in.length(); i++) {
117 char c = in.charAt(i);
118 if (c == escapeChar)
119 c = in.charAt(++i);
120 out.append(c);
121 }
122 return out.toString();
123 }
124
125 public void addTokenChar(char ch) {
126 Character c = new Character(ch);
127 skipchars.put(c, c);
128 }
129
130 public void addBoundaryChar(char ch) {
131 Character c = new Character(ch);
132 boundaryChars.put(c,c);
133 }
134
135 public void addKeeperTokenChar(char ch) {
136 Character c = new Character(ch);
137 keepertokens.put(c,c);
138 }
139 }