1 /* Copyright 2004 The Apache Software Foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 package org.apache.xmlbeans.impl.regex;
17
18 import java.util.Hashtable;
19 import java.util.Locale;
20
21 /**
22 * A regular expression parser for the XML Shema.
23 *
24 * @author TAMURA Kent <kent@trl.ibm.co.jp>
25 */
26 class ParserForXMLSchema extends RegexParser {
27
28 public ParserForXMLSchema() {
29 //this.setLocale(Locale.getDefault());
30 }
31 public ParserForXMLSchema(Locale locale) {
32 //this.setLocale(locale);
33 }
34
35 Token processCaret() throws ParseException {
36 this.next();
37 return Token.createChar('^');
38 }
39 Token processDollar() throws ParseException {
40 this.next();
41 return Token.createChar('$');
42 }
43 Token processLookahead() throws ParseException {
44 throw ex("parser.process.1", this.offset);
45 }
46 Token processNegativelookahead() throws ParseException {
47 throw ex("parser.process.1", this.offset);
48 }
49 Token processLookbehind() throws ParseException {
50 throw ex("parser.process.1", this.offset);
51 }
52 Token processNegativelookbehind() throws ParseException {
53 throw ex("parser.process.1", this.offset);
54 }
55 Token processBacksolidus_A() throws ParseException {
56 throw ex("parser.process.1", this.offset);
57 }
58 Token processBacksolidus_Z() throws ParseException {
59 throw ex("parser.process.1", this.offset);
60 }
61 Token processBacksolidus_z() throws ParseException {
62 throw ex("parser.process.1", this.offset);
63 }
64 Token processBacksolidus_b() throws ParseException {
65 throw ex("parser.process.1", this.offset);
66 }
67 Token processBacksolidus_B() throws ParseException {
68 throw ex("parser.process.1", this.offset);
69 }
70 Token processBacksolidus_lt() throws ParseException {
71 throw ex("parser.process.1", this.offset);
72 }
73 Token processBacksolidus_gt() throws ParseException {
74 throw ex("parser.process.1", this.offset);
75 }
76 Token processStar(Token tok) throws ParseException {
77 this.next();
78 return Token.createClosure(tok);
79 }
80 Token processPlus(Token tok) throws ParseException {
81 // X+ -> XX*
82 this.next();
83 return Token.createConcat(tok, Token.createClosure(tok));
84 }
85 Token processQuestion(Token tok) throws ParseException {
86 // X? -> X|
87 this.next();
88 Token par = Token.createUnion();
89 par.addChild(tok);
90 par.addChild(Token.createEmpty());
91 return par;
92 }
93 boolean checkQuestion(int off) {
94 return false;
95 }
96 Token processParen() throws ParseException {
97 this.next();
98 Token tok = Token.createParen(this.parseRegex(), 0);
99 if (this.read() != super.T_RPAREN) throw ex("parser.factor.1", this.offset-1);
100 this.next(); // Skips ')'
101 return tok;
102 }
103 Token processParen2() throws ParseException {
104 throw ex("parser.process.1", this.offset);
105 }
106 Token processCondition() throws ParseException {
107 throw ex("parser.process.1", this.offset);
108 }
109 Token processModifiers() throws ParseException {
110 throw ex("parser.process.1", this.offset);
111 }
112 Token processIndependent() throws ParseException {
113 throw ex("parser.process.1", this.offset);
114 }
115 Token processBacksolidus_c() throws ParseException {
116 this.next();
117 return this.getTokenForShorthand('c');
118 }
119 Token processBacksolidus_C() throws ParseException {
120 this.next();
121 return this.getTokenForShorthand('C');
122 }
123 Token processBacksolidus_i() throws ParseException {
124 this.next();
125 return this.getTokenForShorthand('i');
126 }
127 Token processBacksolidus_I() throws ParseException {
128 this.next();
129 return this.getTokenForShorthand('I');
130 }
131 Token processBacksolidus_g() throws ParseException {
132 throw this.ex("parser.process.1", this.offset-2);
133 }
134 Token processBacksolidus_X() throws ParseException {
135 throw ex("parser.process.1", this.offset-2);
136 }
137 Token processBackreference() throws ParseException {
138 throw ex("parser.process.1", this.offset-4);
139 }
140
141 int processCIinCharacterClass(RangeToken tok, int c) {
142 tok.mergeRanges(this.getTokenForShorthand(c));
143 return -1;
144 }
145
146
147 /**
148 * Parses a character-class-expression, not a character-class-escape.
149 *
150 * c-c-expression ::= '[' c-group ']'
151 * c-group ::= positive-c-group | negative-c-group | c-c-subtraction
152 * positive-c-group ::= (c-range | c-c-escape)+
153 * negative-c-group ::= '^' positive-c-group
154 * c-c-subtraction ::= (positive-c-group | negative-c-group) subtraction
155 * subtraction ::= '-' c-c-expression
156 * c-range ::= single-range | from-to-range
157 * single-range ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
158 * cc-normal-c ::= <any character except [, ], \>
159 * from-to-range ::= cc-normal-c '-' cc-normal-c
160 *
161 * @param useNrage Ignored.
162 * @return This returns no NrageToken.
163 */
164 protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
165 this.setContext(S_INBRACKETS);
166 this.next(); // '['
167 boolean nrange = false;
168 RangeToken base = null;
169 RangeToken tok;
170 if (this.read() == T_CHAR && this.chardata == '^') {
171 nrange = true;
172 this.next(); // '^'
173 base = Token.createRange();
174 base.addRange(0, Token.UTF16_MAX);
175 tok = Token.createRange();
176 } else {
177 tok = Token.createRange();
178 }
179 int type;
180 boolean firstloop = true;
181 while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
182 // single-range | from-to-range | subtraction
183 if (type == T_CHAR && this.chardata == ']' && !firstloop) {
184 if (nrange) {
185 base.subtractRanges(tok);
186 tok = base;
187 }
188 break;
189 }
190 int c = this.chardata;
191 boolean end = false;
192 if (type == T_BACKSOLIDUS) {
193 switch (c) {
194 case 'd': case 'D':
195 case 'w': case 'W':
196 case 's': case 'S':
197 tok.mergeRanges(this.getTokenForShorthand(c));
198 end = true;
199 break;
200
201 case 'i': case 'I':
202 case 'c': case 'C':
203 c = this.processCIinCharacterClass(tok, c);
204 if (c < 0) end = true;
205 break;
206
207 case 'p':
208 case 'P':
209 int pstart = this.offset;
210 RangeToken tok2 = this.processBacksolidus_pP(c);
211 if (tok2 == null) throw this.ex("parser.atom.5", pstart);
212 tok.mergeRanges(tok2);
213 end = true;
214 break;
215
216 default:
217 c = this.decodeEscaped();
218 } // \ + c
219 } // backsolidus
220 else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
221 // Subraction
222 if (nrange) {
223 base.subtractRanges(tok);
224 tok = base;
225 }
226 RangeToken range2 = this.parseCharacterClass(false);
227 tok.subtractRanges(range2);
228 if (this.read() != T_CHAR || this.chardata != ']')
229 throw this.ex("parser.cc.5", this.offset);
230 break; // Exit this loop
231 }
232 this.next();
233 if (!end) { // if not shorthands...
234 if (type == T_CHAR) {
235 if (c == '[') throw this.ex("parser.cc.6", this.offset-2);
236 if (c == ']') throw this.ex("parser.cc.7", this.offset-2);
237 // (radup) XMLSchema 1.0 allows the '-' as the first character of a range,
238 // but it looks like XMLSchema 1.1 will prohibit it - track this
239 if (c == '-' && !firstloop) throw this.ex("parser.cc.8", this.offset-2);
240 }
241 if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
242 tok.addRange(c, c);
243 } else { // Found '-'
244 // Is this '-' is a from-to token??
245 this.next(); // Skips '-'
246 if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset);
247 // c '-' ']' -> '-' is a single-range.
248 if ((type == T_CHAR && this.chardata == ']')
249 || type == T_XMLSCHEMA_CC_SUBTRACTION) {
250 throw this.ex("parser.cc.8", this.offset-1);
251 } else {
252 int rangeend = this.chardata;
253 if (type == T_CHAR) {
254 if (rangeend == '[') throw this.ex("parser.cc.6", this.offset-1);
255 if (rangeend == ']') throw this.ex("parser.cc.7", this.offset-1);
256 if (rangeend == '-') throw this.ex("parser.cc.8", this.offset-2);
257 }
258 else if (type == T_BACKSOLIDUS)
259 rangeend = this.decodeEscaped();
260 this.next();
261
262 if (c > rangeend) throw this.ex("parser.ope.3", this.offset-1);
263 tok.addRange(c, rangeend);
264 }
265 }
266 }
267 firstloop = false;
268 }
269 if (this.read() == T_EOF)
270 throw this.ex("parser.cc.2", this.offset);
271 tok.sortRanges();
272 tok.compactRanges();
273 //tok.dumpRanges();
274 this.setContext(S_NORMAL);
275 this.next(); // Skips ']'
276
277 return tok;
278 }
279
280 protected RangeToken parseSetOperations() throws ParseException {
281 throw this.ex("parser.process.1", this.offset);
282 }
283
284 Token getTokenForShorthand(int ch) {
285 switch (ch) {
286 case 'd':
287 return ParserForXMLSchema.getRange("xml:isDigit", true);
288 case 'D':
289 return ParserForXMLSchema.getRange("xml:isDigit", false);
290 case 'w':
291 return ParserForXMLSchema.getRange("xml:isWord", true);
292 case 'W':
293 return ParserForXMLSchema.getRange("xml:isWord", false);
294 case 's':
295 return ParserForXMLSchema.getRange("xml:isSpace", true);
296 case 'S':
297 return ParserForXMLSchema.getRange("xml:isSpace", false);
298 case 'c':
299 return ParserForXMLSchema.getRange("xml:isNameChar", true);
300 case 'C':
301 return ParserForXMLSchema.getRange("xml:isNameChar", false);
302 case 'i':
303 return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
304 case 'I':
305 return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
306 default:
307 throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
308 }
309 }
310 int decodeEscaped() throws ParseException {
311 if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1);
312 int c = this.chardata;
313 switch (c) {
314 case 'n': c = '\n'; break; // LINE FEED U+000A
315 case 'r': c = '\r'; break; // CRRIAGE RETURN U+000D
316 case 't': c = '\t'; break; // HORIZONTAL TABULATION U+0009
317 case '\\':
318 case '|':
319 case '.':
320 case '^':
321 case '-':
322 case '?':
323 case '*':
324 case '+':
325 case '{':
326 case '}':
327 case '(':
328 case ')':
329 case '[':
330 case ']':
331 break; // return actucal char
332 default:
333 throw ex("parser.process.1", this.offset-2);
334 }
335 return c;
336 }
337
338 static private Hashtable ranges = null;
339 static private Hashtable ranges2 = null;
340 static synchronized protected RangeToken getRange(String name, boolean positive) {
341 if (ranges == null) {
342 ranges = new Hashtable();
343 ranges2 = new Hashtable();
344
345 Token tok = Token.createRange();
346 setupRange(tok, SPACES);
347 ranges.put("xml:isSpace", tok);
348 ranges2.put("xml:isSpace", Token.complementRanges(tok));
349
350 tok = Token.createRange();
351 setupRange(tok, DIGITS);
352 ranges.put("xml:isDigit", tok);
353 ranges2.put("xml:isDigit", Token.complementRanges(tok));
354
355 tok = Token.createRange();
356 setupRange(tok, DIGITS);
357 ranges.put("xml:isDigit", tok);
358 ranges2.put("xml:isDigit", Token.complementRanges(tok));
359
360 tok = Token.createRange();
361 setupRange(tok, LETTERS);
362 tok.mergeRanges((Token)ranges.get("xml:isDigit"));
363 ranges.put("xml:isWord", tok);
364 ranges2.put("xml:isWord", Token.complementRanges(tok));
365
366 tok = Token.createRange();
367 setupRange(tok, NAMECHARS);
368 ranges.put("xml:isNameChar", tok);
369 ranges2.put("xml:isNameChar", Token.complementRanges(tok));
370
371 tok = Token.createRange();
372 setupRange(tok, LETTERS);
373 tok.addRange('_', '_');
374 tok.addRange(':', ':');
375 ranges.put("xml:isInitialNameChar", tok);
376 ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
377 }
378 RangeToken tok = positive ? (RangeToken)ranges.get(name)
379 : (RangeToken)ranges2.get(name);
380 return tok;
381 }
382
383 static void setupRange(Token range, String src) {
384 int len = src.length();
385 for (int i = 0; i < len; i += 2)
386 range.addRange(src.charAt(i), src.charAt(i+1));
387 }
388
389 private static final String SPACES = "\t\n\r\r ";
390 private static final String NAMECHARS =
391 "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
392 +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
393 +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
394 +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
395 +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
396 +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
397 +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
398 +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
399 +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
400 +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
401 +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
402 +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
403 +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
404 +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
405 +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
406 +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
407 +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
408 +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
409 +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
410 +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
411 +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
412 +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
413 +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
414 +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
415 +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
416 +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
417 +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
418 +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
419 +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
420 +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
421 +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
422 +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
423 +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
424 +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
425 +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
426 +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
427 +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
428 +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
429 +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
430 +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
431 +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
432 +"";
433 private static final String LETTERS =
434 "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
435 +"\u014a\u017e\u0180\u01c3\u01cd\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
436 +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
437 +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
438 +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
439 +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
440 +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
441 +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
442 +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
443 +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
444 +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
445 +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
446 +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
447 +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
448 +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
449 +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
450 +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
451 +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
452 +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
453 +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
454 +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
455 +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
456 +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
457 +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
458 +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
459 +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
460 +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
461 +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
462 +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
463 +"\uac00\ud7a3";
464 private static final String DIGITS =
465 "\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
466 +"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
467 +"\u0F20\u0F29";
468 }