Source code: org/apache/xmlbeans/impl/common/XMLChar.java
1 /* Copyright 2004 The Apache Software Foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 package org.apache.xmlbeans.impl.common;
17
18 /**
19 * This class defines the basic XML character properties. The data
20 * in this class can be used to verify that a character is a valid
21 * XML character or if the character is a space, name start, or name
22 * character.
23 * <p>
24 * A series of convenience methods are supplied to ease the burden
25 * of the developer. Because inlining the checks can improve per
26 * character performance, the tables of character properties are
27 * public. Using the character as an index into the <code>CHARS</code>
28 * array and applying the appropriate mask flag (e.g.
29 * <code>MASK_VALID</code>), yields the same results as calling the
30 * convenience methods. There is one exception: check the comments
31 * for the <code>isValid</code> method for details.
32 *
33 * @author Glenn Marcy, IBM
34 * @author Andy Clark, IBM
35 * @author Eric Ye, IBM
36 * @author Arnaud Le Hors, IBM
37 * @author Rahul Srivastava, Sun Microsystems Inc.
38 *
39 * @version $Id: XMLChar.java 111285 2004-12-08 16:54:26Z cezar $
40 */
41 public class XMLChar {
42
43 //
44 // Constants
45 //
46
47 /** Character flags. */
48 private static final byte[] CHARS = new byte[1 << 16];
49
50 /** Valid character mask. */
51 public static final int MASK_VALID = 0x01;
52
53 /** Space character mask. */
54 public static final int MASK_SPACE = 0x02;
55
56 /** Name start character mask. */
57 public static final int MASK_NAME_START = 0x04;
58
59 /** Name character mask. */
60 public static final int MASK_NAME = 0x08;
61
62 /** Pubid character mask. */
63 public static final int MASK_PUBID = 0x10;
64
65 /**
66 * Content character mask. Special characters are those that can
67 * be considered the start of markup, such as '<' and '&'.
68 * The various newline characters are considered special as well.
69 * All other valid XML characters can be considered content.
70 * <p>
71 * This is an optimization for the inner loop of character scanning.
72 */
73 public static final int MASK_CONTENT = 0x20;
74
75 /** NCName start character mask. */
76 public static final int MASK_NCNAME_START = 0x40;
77
78 /** NCName character mask. */
79 public static final int MASK_NCNAME = 0x80;
80
81 //
82 // Static initialization
83 //
84
85 static {
86
87 //
88 // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
89 // [#xE000-#xFFFD] | [#x10000-#x10FFFF]
90 //
91
92 int charRange[] = {
93 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
94 };
95
96 //
97 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
98 //
99
100 int spaceChar[] = {
101 0x0020, 0x0009, 0x000D, 0x000A,
102 };
103
104 //
105 // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
106 // CombiningChar | Extender
107 //
108
109 int nameChar[] = {
110 0x002D, 0x002E, // '-' and '.'
111 };
112
113 //
114 // [5] Name ::= (Letter | '_' | ':') (NameChar)*
115 //
116
117 int nameStartChar[] = {
118 0x003A, 0x005F, // ':' and '_'
119 };
120
121 //
122 // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
123 //
124
125 int pubidChar[] = {
126 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
127 0x005F
128 };
129
130 int pubidRange[] = {
131 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
132 };
133
134 //
135 // [84] Letter ::= BaseChar | Ideographic
136 //
137
138 int letterRange[] = {
139 // BaseChar
140 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
141 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
142 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
143 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
144 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
145 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
146 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
147 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
148 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
149 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
150 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
151 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
152 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
153 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
154 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
155 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
156 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
157 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
158 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
159 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
160 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
161 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
162 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
163 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
164 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
165 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
166 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
167 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
168 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
169 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
170 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
171 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
172 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
173 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
174 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
175 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
176 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
177 0xAC00, 0xD7A3,
178 // Ideographic
179 0x3021, 0x3029, 0x4E00, 0x9FA5,
180 };
181 int letterChar[] = {
182 // BaseChar
183 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
184 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
185 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
186 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
187 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
188 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
189 0x1F5D, 0x1FBE, 0x2126, 0x212E,
190 // Ideographic
191 0x3007,
192 };
193
194 //
195 // [87] CombiningChar ::= ...
196 //
197
198 int combiningCharRange[] = {
199 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
200 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
201 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
202 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
203 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
204 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
205 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
206 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
207 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
208 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
209 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
210 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
211 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
212 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
213 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
214 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
215 0x20D0, 0x20DC, 0x302A, 0x302F,
216 };
217
218 int combiningCharChar[] = {
219 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
220 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
221 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
222 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
223 };
224
225 //
226 // [88] Digit ::= ...
227 //
228
229 int digitRange[] = {
230 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
231 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
232 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
233 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
234 };
235
236 //
237 // [89] Extender ::= ...
238 //
239
240 int extenderRange[] = {
241 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
242 };
243
244 int extenderChar[] = {
245 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
246 };
247
248 //
249 // SpecialChar ::= '<', '&', '\n', '\r', ']'
250 //
251
252 int specialChar[] = {
253 '<', '&', '\n', '\r', ']',
254 };
255
256 //
257 // Initialize
258 //
259
260 // set valid characters
261 for (int i = 0; i < charRange.length; i += 2) {
262 for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
263 CHARS[j] |= MASK_VALID | MASK_CONTENT;
264 }
265 }
266
267 // remove special characters
268 for (int i = 0; i < specialChar.length; i++) {
269 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
270 }
271
272 // set space characters
273 for (int i = 0; i < spaceChar.length; i++) {
274 CHARS[spaceChar[i]] |= MASK_SPACE;
275 }
276
277 // set name start characters
278 for (int i = 0; i < nameStartChar.length; i++) {
279 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
280 MASK_NCNAME_START | MASK_NCNAME;
281 }
282 for (int i = 0; i < letterRange.length; i += 2) {
283 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
284 CHARS[j] |= MASK_NAME_START | MASK_NAME |
285 MASK_NCNAME_START | MASK_NCNAME;
286 }
287 }
288 for (int i = 0; i < letterChar.length; i++) {
289 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
290 MASK_NCNAME_START | MASK_NCNAME;
291 }
292
293 // set name characters
294 for (int i = 0; i < nameChar.length; i++) {
295 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
296 }
297 for (int i = 0; i < digitRange.length; i += 2) {
298 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
299 CHARS[j] |= MASK_NAME | MASK_NCNAME;
300 }
301 }
302 for (int i = 0; i < combiningCharRange.length; i += 2) {
303 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
304 CHARS[j] |= MASK_NAME | MASK_NCNAME;
305 }
306 }
307 for (int i = 0; i < combiningCharChar.length; i++) {
308 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
309 }
310 for (int i = 0; i < extenderRange.length; i += 2) {
311 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
312 CHARS[j] |= MASK_NAME | MASK_NCNAME;
313 }
314 }
315 for (int i = 0; i < extenderChar.length; i++) {
316 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
317 }
318
319 // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
320 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
321
322 // set Pubid characters
323 for (int i = 0; i < pubidChar.length; i++) {
324 CHARS[pubidChar[i]] |= MASK_PUBID;
325 }
326 for (int i = 0; i < pubidRange.length; i += 2) {
327 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
328 CHARS[j] |= MASK_PUBID;
329 }
330 }
331
332 } // <clinit>()
333
334 //
335 // Public static methods
336 //
337
338 /**
339 * Returns true if the specified character is a supplemental character.
340 *
341 * @param c The character to check.
342 */
343 public static boolean isSupplemental(int c) {
344 return (c >= 0x10000 && c <= 0x10FFFF);
345 }
346
347 /**
348 * Returns true the supplemental character corresponding to the given
349 * surrogates.
350 *
351 * @param h The high surrogate.
352 * @param l The low surrogate.
353 */
354 public static int supplemental(char h, char l) {
355 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
356 }
357
358 /**
359 * Returns the high surrogate of a supplemental character
360 *
361 * @param c The supplemental character to "split".
362 */
363 public static char highSurrogate(int c) {
364 return (char) (((c - 0x00010000) >> 10) + 0xD800);
365 }
366
367 /**
368 * Returns the low surrogate of a supplemental character
369 *
370 * @param c The supplemental character to "split".
371 */
372 public static char lowSurrogate(int c) {
373 return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
374 }
375
376 /**
377 * Returns whether the given character is a high surrogate
378 *
379 * @param c The character to check.
380 */
381 public static boolean isHighSurrogate(int c) {
382 return (0xD800 <= c && c <= 0xDBFF);
383 }
384
385 /**
386 * Returns whether the given character is a low surrogate
387 *
388 * @param c The character to check.
389 */
390 public static boolean isLowSurrogate(int c) {
391 return (0xDC00 <= c && c <= 0xDFFF);
392 }
393
394
395 /**
396 * Returns true if the specified character is valid. This method
397 * also checks the surrogate character range from 0x10000 to 0x10FFFF.
398 * <p>
399 * If the program chooses to apply the mask directly to the
400 * <code>CHARS</code> array, then they are responsible for checking
401 * the surrogate character range.
402 *
403 * @param c The character to check.
404 */
405 public static boolean isValid(int c) {
406 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
407 (0x10000 <= c && c <= 0x10FFFF);
408 } // isValid(int):boolean
409
410 /**
411 * Returns true if the specified character is invalid.
412 *
413 * @param c The character to check.
414 */
415 public static boolean isInvalid(int c) {
416 return !isValid(c);
417 } // isInvalid(int):boolean
418
419 /**
420 * Returns true if the specified character can be considered content.
421 *
422 * @param c The character to check.
423 */
424 public static boolean isContent(int c) {
425 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
426 (0x10000 <= c && c <= 0x10FFFF);
427 } // isContent(int):boolean
428
429 /**
430 * Returns true if the specified character can be considered markup.
431 * Markup characters include '<', '&', and '%'.
432 *
433 * @param c The character to check.
434 */
435 public static boolean isMarkup(int c) {
436 return c == '<' || c == '&' || c == '%';
437 } // isMarkup(int):boolean
438
439 /**
440 * Returns true if the specified character is a space character
441 * as defined by production [3] in the XML 1.0 specification.
442 *
443 * @param c The character to check.
444 */
445 public static boolean isSpace(int c) {
446 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
447 } // isSpace(int):boolean
448
449 /**
450 * Returns true if the specified character is a space character
451 * as amdended in the XML 1.1 specification.
452 *
453 * @param c The character to check.
454 */
455 public static boolean isXML11Space(int c) {
456 return (c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0) ||
457 c == 0x85 || c == 0x2028;
458 } // isXML11Space(int):boolean
459
460 /**
461 * Returns true if the specified character is a valid name start
462 * character as defined by production [5] in the XML 1.0
463 * specification.
464 *
465 * @param c The character to check.
466 */
467 public static boolean isNameStart(int c) {
468 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
469 } // isNameStart(int):boolean
470
471 /**
472 * Returns true if the specified character is a valid name
473 * character as defined by production [4] in the XML 1.0
474 * specification.
475 *
476 * @param c The character to check.
477 */
478 public static boolean isName(int c) {
479 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
480 } // isName(int):boolean
481
482 /**
483 * Returns true if the specified character is a valid NCName start
484 * character as defined by production [4] in Namespaces in XML
485 * recommendation.
486 *
487 * @param c The character to check.
488 */
489 public static boolean isNCNameStart(int c) {
490 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
491 } // isNCNameStart(int):boolean
492
493 /**
494 * Returns true if the specified character is a valid NCName
495 * character as defined by production [5] in Namespaces in XML
496 * recommendation.
497 *
498 * @param c The character to check.
499 */
500 public static boolean isNCName(int c) {
501 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
502 } // isNCName(int):boolean
503
504 /**
505 * Returns true if the specified character is a valid Pubid
506 * character as defined by production [13] in the XML 1.0
507 * specification.
508 *
509 * @param c The character to check.
510 */
511 public static boolean isPubid(int c) {
512 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
513 } // isPubid(int):boolean
514
515 /*
516 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
517 */
518 /**
519 * Check to see if a string is a valid Name according to [5]
520 * in the XML 1.0 Recommendation
521 *
522 * @param name string to check
523 * @return true if name is a valid Name
524 */
525 public static boolean isValidName(String name) {
526 if (name.length() == 0)
527 return false;
528 char ch = name.charAt(0);
529 if( isNameStart(ch) == false)
530 return false;
531 for (int i = 1; i < name.length(); i++ ) {
532 ch = name.charAt(i);
533 if( isName( ch ) == false ){
534 return false;
535 }
536 }
537 return true;
538 } // isValidName(String):boolean
539
540
541 /*
542 * from the namespace rec
543 * [4] NCName ::= (Letter | '_') (NCNameChar)*
544 */
545 /**
546 * Check to see if a string is a valid NCName according to [4]
547 * from the XML Namespaces 1.0 Recommendation
548 *
549 * @param ncName string to check
550 * @return true if name is a valid NCName
551 */
552 public static boolean isValidNCName(String ncName) {
553 if (ncName.length() == 0)
554 return false;
555 char ch = ncName.charAt(0);
556 if( isNCNameStart(ch) == false)
557 return false;
558 for (int i = 1; i < ncName.length(); i++ ) {
559 ch = ncName.charAt(i);
560 if( isNCName( ch ) == false ){
561 return false;
562 }
563 }
564 return true;
565 } // isValidNCName(String):boolean
566
567 /*
568 * [7] Nmtoken ::= (NameChar)+
569 */
570 /**
571 * Check to see if a string is a valid Nmtoken according to [7]
572 * in the XML 1.0 Recommendation
573 *
574 * @param nmtoken string to check
575 * @return true if nmtoken is a valid Nmtoken
576 */
577 public static boolean isValidNmtoken(String nmtoken) {
578 if (nmtoken.length() == 0)
579 return false;
580 for (int i = 0; i < nmtoken.length(); i++ ) {
581 char ch = nmtoken.charAt(i);
582 if( ! isName( ch ) ){
583 return false;
584 }
585 }
586 return true;
587 } // isValidName(String):boolean
588
589
590
591
592
593 // encodings
594
595 /**
596 * Returns true if the encoding name is a valid IANA encoding.
597 * This method does not verify that there is a decoder available
598 * for this encoding, only that the characters are valid for an
599 * IANA encoding name.
600 *
601 * @param ianaEncoding The IANA encoding name.
602 */
603 public static boolean isValidIANAEncoding(String ianaEncoding) {
604 if (ianaEncoding != null) {
605 int length = ianaEncoding.length();
606 if (length > 0) {
607 char c = ianaEncoding.charAt(0);
608 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
609 for (int i = 1; i < length; i++) {
610 c = ianaEncoding.charAt(i);
611 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
612 (c < '0' || c > '9') && c != '.' && c != '_' &&
613 c != '-') {
614 return false;
615 }
616 }
617 return true;
618 }
619 }
620 }
621 return false;
622 } // isValidIANAEncoding(String):boolean
623
624 /**
625 * Returns true if the encoding name is a valid Java encoding.
626 * This method does not verify that there is a decoder available
627 * for this encoding, only that the characters are valid for an
628 * Java encoding name.
629 *
630 * @param javaEncoding The Java encoding name.
631 */
632 public static boolean isValidJavaEncoding(String javaEncoding) {
633 if (javaEncoding != null) {
634 int length = javaEncoding.length();
635 if (length > 0) {
636 for (int i = 1; i < length; i++) {
637 char c = javaEncoding.charAt(i);
638 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
639 (c < '0' || c > '9') && c != '.' && c != '_' &&
640 c != '-') {
641 return false;
642 }
643 }
644 return true;
645 }
646 }
647 return false;
648 } // isValidIANAEncoding(String):boolean
649
650 } // class XMLChar