Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/jdom/Verifier.java


1   /*-- 
2   
3    $Id: Verifier.java,v 1.51 2004/08/31 21:58:55 jhunter Exp $
4   
5    Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
6    All rights reserved.
7    
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions
10   are met:
11   
12   1. Redistributions of source code must retain the above copyright
13      notice, this list of conditions, and the following disclaimer.
14   
15   2. Redistributions in binary form must reproduce the above copyright
16      notice, this list of conditions, and the disclaimer that follows 
17      these conditions in the documentation and/or other materials 
18      provided with the distribution.
19  
20   3. The name "JDOM" must not be used to endorse or promote products
21      derived from this software without prior written permission.  For
22      written permission, please contact <request_AT_jdom_DOT_org>.
23   
24   4. Products derived from this software may not be called "JDOM", nor
25      may "JDOM" appear in their name, without prior written permission
26      from the JDOM Project Management <request_AT_jdom_DOT_org>.
27   
28   In addition, we request (but do not require) that you include in the 
29   end-user documentation provided with the redistribution and/or in the 
30   software itself an acknowledgement equivalent to the following:
31       "This product includes software developed by the
32        JDOM Project (http://www.jdom.org/)."
33   Alternatively, the acknowledgment may be graphical using the logos 
34   available at http://www.jdom.org/images/logos.
35  
36   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39   DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47   SUCH DAMAGE.
48  
49   This software consists of voluntary contributions made by many 
50   individuals on behalf of the JDOM Project and was originally 
51   created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52   Brett McLaughlin <brett_AT_jdom_DOT_org>.  For more information
53   on the JDOM Project, please see <http://www.jdom.org/>.
54   
55   */
56  
57  package org.jdom;
58  
59  import java.util.*;
60  
61  /**
62   * A utility class to handle well-formedness checks on names, data, and other
63   * verification tasks for JDOM. The class is final and may not be subclassed.
64   *
65   * @version $Revision: 1.51 $, $Date: 2004/08/31 21:58:55 $
66   * @author  Brett McLaughlin
67   * @author  Elliotte Rusty Harold
68   * @author  Jason Hunter
69   * @author  Bradley S. Huffman
70   */
71  final public class Verifier {
72  
73      private static final String CVS_ID = 
74        "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.51 $ $Date: 2004/08/31 21:58:55 $ $Name: jdom_1_0 $";
75  
76      /**
77       * Ensure instantation cannot occur.
78       */
79      private Verifier() { }
80  
81      /**
82       * This will check the supplied name to see if it is legal for use as
83       * a JDOM <code>{@link Element}</code> name.
84       *
85       * @param name <code>String</code> name to check.
86       * @return <code>String</code> reason name is illegal, or
87       *         <code>null</code> if name is OK.
88       */
89      public static String checkElementName(String name) {
90          // Check basic XML name rules first
91          String reason;
92          if ((reason = checkXMLName(name)) != null) {
93              return reason;
94          }
95  
96          // No colons allowed, since elements handle this internally
97          if (name.indexOf(":") != -1) {
98              return "Element names cannot contain colons";
99          }
100 
101         // If we got here, everything is OK
102         return null;
103     }
104 
105     /**
106      * This will check the supplied name to see if it is legal for use as
107      * a JDOM <code>{@link Attribute}</code> name.
108      *
109      * @param name <code>String</code> name to check.
110      * @return <code>String</code> reason name is illegal, or
111      *         <code>null</code> if name is OK.
112      */
113     public static String checkAttributeName(String name) {
114         // Check basic XML name rules first
115         String reason;
116         if ((reason = checkXMLName(name)) != null) {
117             return reason;
118         }
119 
120         // No colons are allowed, since attributes handle this internally
121         if (name.indexOf(":") != -1) {
122             return "Attribute names cannot contain colons";
123         }
124 
125         // Attribute names may not be xmlns since we do this internally too
126         if (name.equals("xmlns")) {
127             return "An Attribute name may not be \"xmlns\"; " +
128                    "use the Namespace class to manage namespaces";
129         }
130 
131         // If we got here, everything is OK
132         return null;
133     }
134     
135     /**
136      * This will check the supplied string to see if it only contains
137      * characters allowed by the XML 1.0 specification. The C0 controls
138      * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded
139      * except for carriage return, linefeed, and the horizontal tab.
140      * Surrogates are also excluded. 
141      * <p>
142      * This method is useful for checking element content and attribute
143      * values. Note that characters 
144      * like " and &lt; are allowed in attribute values and element content. 
145      * They will simply be escaped as &quot; or &lt; 
146      * when the value is serialized. 
147      * </p>
148      *
149      * @param text <code>String</code> value to check.
150      * @return <code>String</code> reason name is illegal, or
151      *         <code>null</code> if name is OK.
152      */
153     public static String checkCharacterData(String text) {
154         if (text == null) {
155             return "A null is not a legal XML value";
156         }
157 
158         // Do check
159         for (int i = 0, len = text.length(); i<len; i++) {
160 
161             int ch = text.charAt(i);
162             
163             // Check if high part of a surrogate pair
164             if (ch >= 0xD800 && ch <= 0xDBFF) {
165                 // Check if next char is the low-surrogate
166                 i++;
167                 if (i < len) {
168                     char low = text.charAt(i);
169                     if (low < 0xDC00 || low > 0xDFFF) {
170                         return "Illegal Surrogate Pair";
171                     }
172                     // It's a good pair, calculate the true value of
173                     // the character to then fall thru to isXMLCharacter
174                     ch = 0x10000 + (ch - 0xD800) * 0x400 + (low - 0xDC00);
175                 }
176                 else {
177                     return "Surrogate Pair Truncated";
178                 }
179             }
180 
181             if (!isXMLCharacter(ch)) {
182                 // Likely this character can't be easily displayed
183                 // because it's a control so we use it'd hexadecimal 
184                 // representation in the reason.
185                 return ("0x" + Integer.toHexString(ch) +
186                         " is not a legal XML character");
187             }
188         }
189 
190         // If we got here, everything is OK
191         return null;
192     }
193 
194     /**
195      * This will check the supplied data to see if it is legal for use as
196      * JDOM <code>{@link CDATA}</code>.
197      *
198      * @param data <code>String</code> data to check.
199      * @return <code>String</code> reason data is illegal, or
200      *         <code>null</code> is name is OK.
201      */
202     public static String checkCDATASection(String data) {
203         String reason = null;
204         if ((reason = checkCharacterData(data)) != null) {
205             return reason;
206         }
207 
208         if (data.indexOf("]]>") != -1) {
209             return "CDATA cannot internally contain a CDATA ending " +
210                    "delimiter (]]>)";
211         }
212 
213         // If we got here, everything is OK
214         return null;
215     }
216 
217     /**
218      * This will check the supplied name to see if it is legal for use as
219      * a JDOM <code>{@link Namespace}</code> prefix.
220      *
221      * @param prefix <code>String</code> prefix to check.
222      * @return <code>String</code> reason name is illegal, or
223      *         <code>null</code> if name is OK.
224      */
225     public static String checkNamespacePrefix(String prefix) {
226         // Manually do rules, since URIs can be null or empty
227         if ((prefix == null) || (prefix.equals(""))) {
228             return null;
229         }
230 
231         // Cannot start with a number
232         char first = prefix.charAt(0);
233         if (isXMLDigit(first)) {
234             return "Namespace prefixes cannot begin with a number";
235         }
236         // Cannot start with a $
237         if (first == '$') {
238             return "Namespace prefixes cannot begin with a dollar sign ($)";
239         }
240         // Cannot start with a -
241         if (first == '-') {
242             return "Namespace prefixes cannot begin with a hyphen (-)";
243         }
244         // Cannot start with a .
245         if (first == '.') {
246             return "Namespace prefixes cannot begin with a period (.)";
247         }
248         // Cannot start with "xml" in any character case
249         if (prefix.toLowerCase().startsWith("xml")) {
250             return "Namespace prefixes cannot begin with " +
251                    "\"xml\" in any combination of case";
252         }
253 
254         // Ensure legal content
255         for (int i=0, len = prefix.length(); i<len; i++) {
256             char c = prefix.charAt(i);
257             if (!isXMLNameCharacter(c)) {
258                 return "Namespace prefixes cannot contain the character \"" +
259                         c + "\"";
260             }
261         }
262 
263         // No colons allowed
264         if (prefix.indexOf(":") != -1) {
265             return "Namespace prefixes cannot contain colons";
266         }
267 
268         // If we got here, everything is OK
269         return null;
270     }
271 
272     /**
273      * This will check the supplied name to see if it is legal for use as
274      * a JDOM <code>{@link Namespace}</code> URI.
275      *
276      * @param uri <code>String</code> URI to check.
277      * @return <code>String</code> reason name is illegal, or
278      *         <code>null</code> if name is OK.
279      */
280     public static String checkNamespaceURI(String uri) {
281         // Manually do rules, since URIs can be null or empty
282         if ((uri == null) || (uri.equals(""))) {
283             return null;
284         }
285 
286         // Cannot start with a number
287         char first = uri.charAt(0);
288         if (Character.isDigit(first)) {
289             return "Namespace URIs cannot begin with a number";
290         }
291         // Cannot start with a $
292         if (first == '$') {
293             return "Namespace URIs cannot begin with a dollar sign ($)";
294         }
295         // Cannot start with a -
296         if (first == '-') {
297             return "Namespace URIs cannot begin with a hyphen (-)";
298         }
299 
300         // If we got here, everything is OK
301         return null;
302     }
303 
304     /**
305      * Check if two namespaces collide.
306      *
307      * @param namespace <code>Namespace</code> to check.
308      * @param other <code>Namespace</code> to check against.
309      * @return <code>String</code> reason for collision, or
310      *         <code>null</code> if no collision.
311      */
312     public static String checkNamespaceCollision(Namespace namespace,
313                                                  Namespace other) {
314         String p1,p2,u1,u2,reason;
315 
316         reason = null;
317         p1 = namespace.getPrefix();
318         u1 = namespace.getURI();
319         p2 = other.getPrefix();
320         u2 = other.getURI();
321         if (p1.equals(p2) && !u1.equals(u2)) {
322             reason = "The namespace prefix \"" + p1 + "\" collides";
323         }
324         return reason;
325     }
326 
327     /**
328      * Check if <code>{@link Attribute}</code>'s namespace collides with a 
329      * <code>{@link Element}</code>'s namespace.
330      *
331      * @param attribute <code>Attribute</code> to check.
332      * @param element <code>Element</code> to check against.
333      * @return <code>String</code> reason for collision, or
334      *         <code>null</code> if no collision.
335      */
336     public static String checkNamespaceCollision(Attribute attribute,
337                                                  Element element) {
338         Namespace namespace = attribute.getNamespace();
339         String prefix = namespace.getPrefix();
340         if ("".equals(prefix)) {
341             return null;
342         }
343 
344         return checkNamespaceCollision(namespace, element);
345     }
346 
347     /**
348      * Check if a <code>{@link Namespace}</code> collides with a
349      * <code>{@link Element}</code>'s namespace.
350      *
351      * @param namespace <code>Namespace</code> to check.
352      * @param element <code>Element</code> to check against.
353      * @return <code>String</code> reason for collision, or
354      *         <code>null</code> if no collision.
355      */
356     public static String checkNamespaceCollision(Namespace namespace,
357                                                  Element element) {
358         String reason = checkNamespaceCollision(namespace,
359                                                 element.getNamespace());
360         if (reason != null) {
361             return reason + " with the element namespace prefix";
362         }
363 
364         reason = checkNamespaceCollision(namespace,
365                                          element.getAdditionalNamespaces());
366         if (reason != null) {
367             return reason;
368         }
369 
370         reason = checkNamespaceCollision(namespace, element.getAttributes());
371         if (reason != null) {
372             return reason;
373         }
374 
375         return null;
376     }
377 
378     /**
379      * Check if a <code>{@link Namespace}</code> collides with a
380      * <code>{@link Attribute}</code>'s namespace.
381      *
382      * @param namespace <code>Namespace</code> to check.
383      * @param attribute <code>Attribute</code> to check against.
384      * @return <code>String</code> reason for collision, or
385      *         <code>null</code> if no collision.
386      */
387     public static String checkNamespaceCollision(Namespace namespace,
388                                                  Attribute attribute) {
389         String reason = checkNamespaceCollision(namespace,
390                                                 attribute.getNamespace());
391         if (reason != null) {
392             reason += " with an attribute namespace prefix on the element";
393         }
394         return reason;
395     }
396 
397     /**
398      * Check if a <code>{@link Namespace}</code> collides with any namespace
399      * from a list of objects.
400      *
401      * @param namespace <code>Namespace</code> to check.
402      * @param list <code>List</code> to check against.
403      * @return <code>String</code> reason for collision, or
404      *         <code>null</code> if no collision.
405      */
406     public static String checkNamespaceCollision(Namespace namespace,
407                                                  List list) {
408         if (list == null) {
409             return null;
410         }
411 
412         String reason = null;
413         Iterator i = list.iterator();
414         while ((reason == null) && i.hasNext()) {
415             Object obj = i.next();
416             if (obj instanceof Attribute) {
417                 reason = checkNamespaceCollision(namespace, (Attribute) obj);
418             }
419             else if (obj instanceof Element) {
420                 reason = checkNamespaceCollision(namespace, (Element) obj);
421             }
422             else if (obj instanceof Namespace) {
423                 reason = checkNamespaceCollision(namespace, (Namespace) obj);
424                 if (reason != null) {
425                     reason += " with an additional namespace declared" +
426                               " by the element";
427                 }
428             }
429         }
430         return reason;
431     }
432 
433     /**
434      * This will check the supplied data to see if it is legal for use as
435      * a JDOM <code>{@link ProcessingInstruction}</code> target.
436      *
437      * @param target <code>String</code> target to check.
438      * @return <code>String</code> reason target is illegal, or
439      *         <code>null</code> if target is OK.
440      */
441     public static String checkProcessingInstructionTarget(String target) {
442         // Check basic XML name rules first
443         String reason;
444         if ((reason = checkXMLName(target)) != null) {
445             return reason;
446         }
447 
448         // No colons allowed, per Namespace Specification Section 6
449         if (target.indexOf(":") != -1) {
450             return "Processing instruction targets cannot contain colons";
451         }
452 
453         // Cannot begin with 'xml' in any case
454         if (target.equalsIgnoreCase("xml")) {
455             return "Processing instructions cannot have a target of " +
456                    "\"xml\" in any combination of case. (Note that the " +
457                    "\"<?xml ... ?>\" declaration at the beginning of a " +
458                    "document is not a processing instruction and should not " + 
459                    "be added as one; it is written automatically during " +
460                    "output, e.g. by XMLOutputter.)";
461         }
462 
463         // If we got here, everything is OK
464         return null;
465     }
466 
467    /**
468      * This will check the supplied data to see if it is legal for use as
469      * <code>{@link ProcessingInstruction}</code> data. Besides checking that
470      * all the characters are allowed in XML, this also checks
471      * that the data does not contain the PI end-string "?&gt;".
472      *
473      * @param data <code>String</code> data to check.
474      * @return <code>String</code> reason data is illegal, or
475      *         <code>null</code> if data is OK.
476      */
477     public static String checkProcessingInstructionData(String data) {
478         // Check basic XML name rules first
479         String reason = checkCharacterData(data);
480 
481         if (reason == null) {
482             if (data.indexOf("?>") >= 0) {
483                 return "Processing instructions cannot contain " +
484                        "the string \"?>\"";
485             }
486         }
487 
488         return reason;
489     }
490 
491     /**
492      * This will check the supplied data to see if it is legal for use as
493      * JDOM <code>{@link Comment}</code> data.
494      *
495      * @param data <code>String</code> data to check.
496      * @return <code>String</code> reason data is illegal, or
497      *         <code>null</code> if data is OK.
498      */
499     public static String checkCommentData(String data) {
500         String reason = null;
501         if ((reason = checkCharacterData(data)) != null) {
502             return reason;
503         }
504 
505         if (data.indexOf("--") != -1) {
506             return "Comments cannot contain double hyphens (--)";
507         }
508         if (data.startsWith("-")) {
509             return "Comment data cannot start with a hyphen.";
510         }
511         if (data.endsWith("-")) {
512             return "Comment data cannot end with a hyphen.";
513         }
514 
515         // If we got here, everything is OK
516         return null;
517     }
518 
519     // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
520     // [-'()+,./:=?;*#@$_%]
521     public static boolean isXMLPublicIDCharacter(char c) {
522 
523         if (c >= 'a' && c <= 'z') return true;
524         if (c >= '?' && c <= 'Z') return true;
525         if (c >= '\'' && c <= ';') return true;
526 
527         if (c == ' ') return true;
528         if (c == '!') return true;
529         if (c == '=') return true;
530         if (c == '#') return true;
531         if (c == '$') return true;
532         if (c == '_') return true;
533         if (c == '%') return true;
534         if (c == '\n') return true;
535         if (c == '\r') return true;
536         if (c == '\t') return true;
537 
538         return false;
539     }
540 
541     /**
542      * This will ensure that the data for a public identifier
543      * is legal.
544      *
545      * @param publicID <code>String</code> public ID to check.
546      * @return <code>String</code> reason public ID is illegal, or
547      *         <code>null</code> if public ID is OK.
548      */
549     public static String checkPublicID(String publicID) {
550         String reason = null;
551 
552         if (publicID == null) return null;
553         // This indicates there is no public ID
554 
555         for (int i = 0; i < publicID.length(); i++) {
556           char c = publicID.charAt(i);
557           if (!isXMLPublicIDCharacter(c)) {
558             reason = c + " is not a legal character in public IDs";
559             break;
560           }
561         }
562 
563         return reason;
564     }
565 
566 
567     /**
568      * This will ensure that the data for a system literal
569      * is legal.
570      *
571      * @param systemLiteral <code>String</code> system literal to check.
572      * @return <code>String</code> reason system literal is illegal, or
573      *         <code>null</code> if system literal is OK.
574      */
575     public static String checkSystemLiteral(String systemLiteral) {
576         String reason = null;
577 
578         if (systemLiteral == null) return null;
579         // This indicates there is no system ID
580 
581         if (systemLiteral.indexOf('\'') != -1
582           && systemLiteral.indexOf('"') != -1) {
583             reason =
584              "System literals cannot simultaneously contain both single and double quotes.";
585         }
586         else {
587           reason = checkCharacterData(systemLiteral);
588         }
589 
590         return reason;
591     }
592 
593     /**
594      * This is a utility function for sharing the base process of checking
595      * any XML name.
596      *
597      * @param name <code>String</code> to check for XML name compliance.
598      * @return <code>String</code> reason the name is illegal, or
599      *         <code>null</code> if OK.
600      */
601     public static String checkXMLName(String name) {
602         // Cannot be empty or null
603         if ((name == null) || (name.length() == 0) 
604                            || (name.trim().equals(""))) {
605             return "XML names cannot be null or empty";
606         }
607 
608       
609         // Cannot start with a number
610         char first = name.charAt(0);
611         if (!isXMLNameStartCharacter(first)) {
612             return "XML names cannot begin with the character \"" + 
613                    first + "\"";
614         }
615         // Ensure legal content for non-first chars
616         for (int i=1, len = name.length(); i<len; i++) {
617             char c = name.charAt(i);
618             if (!isXMLNameCharacter(c)) {
619                 return "XML names cannot contain the character \"" + c + "\"";
620             }
621         }
622 
623         // We got here, so everything is OK
624         return null;
625     }
626 
627    /**
628      * <p>
629      * Checks a string to see if it is a legal RFC 2396 URI.
630      * Both absolute and relative URIs are supported.
631      * </p>
632      *
633      * @param uri <code>String</code> to check.
634      * @return <code>String</code> reason the URI is illegal, or
635      *         <code>null</code> if OK.
636      */
637     public static String checkURI(String uri) {
638         // URIs can be null or empty
639         if ((uri == null) || (uri.equals(""))) {
640             return null;
641         }
642 
643         for (int i = 0; i < uri.length(); i++) {
644             char test = uri.charAt(i);
645             if (!isURICharacter(test)) {
646                 String msgNumber = "0x" + Integer.toHexString(test);
647                 if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test);
648                 return "URIs cannot contain " + msgNumber;
649             } // end if
650             if (test == '%') { // must be followed by two hexadecimal digits
651                    try {
652                        char firstDigit = uri.charAt(i+1);
653                        char secondDigit = uri.charAt(i+2);
654                        if (!isHexDigit(firstDigit) ||
655                            !isHexDigit(secondDigit)) {
656                            return "Percent signs in URIs must be followed by "
657                             + "exactly two hexadecimal digits.";
658                        }
659 
660                    }
661                    catch (StringIndexOutOfBoundsException e) {
662                        return "Percent signs in URIs must be followed by "
663                         + "exactly two hexadecimal digits.";
664                    }
665             }
666         } // end for
667 
668         // If we got here, everything is OK
669         return null;
670     }
671 
672    /**
673      * <p>
674      * <p>
675      * This is a utility function for determining whether a specified
676      * Unicode character is a hexadecimal digit as defined in RFC 2396;
677      * that is, one of the ASCII characters 0-9, a-f, or A-F
678      * </p>
679      *
680      * @param c  to check for hex digit.
681      * @return true if it's allowed, false otherwise.
682      */
683     public static boolean isHexDigit(char c) {
684 
685     // I suspect most characters passed to this method will be
686     // correct hexadecimal digits, so I test for the true cases
687     // first. If this proves to be a performance bottleneck
688     // a switch statement or lookup table
689     // might optimize this.
690         if (c >= '0' && c <= '9') return true;
691         if (c >= 'A' && c <= 'F') return true;
692         if (c >= 'a' && c <= 'f') return true;
693 
694         return false;
695     }
696 
697     /**
698      * <p>
699      * This is a utility function for determining whether
700      * a specified Unicode character is legal in URI references
701      * as determined by RFC 2396.
702      * </p>
703      *
704      * @param c <code>char</code> to check for URI reference compliance.
705      * @return true if it's allowed, false otherwise.
706      */
707     public static boolean isURICharacter(char c) {
708         if (c >= 'a' && c <= 'z') return true;
709         if (c >= 'A' && c <= 'Z') return true;
710         if (c >= '0' && c <= '9') return true;
711         if (c == '/') return true;
712         if (c == '-') return true;
713         if (c == '.') return true;
714         if (c == '?') return true;
715         if (c == ':') return true;
716         if (c == '@') return true;
717         if (c == '&') return true;
718         if (c == '=') return true;
719         if (c == '+') return true;
720         if (c == '$') return true;
721         if (c == ',') return true;
722         if (c == '%') return true;
723 
724         if (c == '_') return true;
725         if (c == '!') return true;
726         if (c == '~') return true;
727         if (c == '*') return true;
728         if (c == '\'') return true;
729         if (c == '(') return true;
730         if (c == ')') return true;
731         return false;
732     }
733     
734     /**
735      * This is a utility function for determining whether a specified 
736      * character is a character according to production 2 of the 
737      * XML 1.0 specification.
738      *
739      * @param c <code>char</code> to check for XML compliance
740      * @return <code>boolean</code> true if it's a character, 
741      *                                false otherwise
742      */
743     public static boolean isXMLCharacter(int c) {
744     
745         if (c == '\n') return true;
746         if (c == '\r') return true;
747         if (c == '\t') return true;
748         
749         if (c < 0x20) return false;  if (c <= 0xD7FF) return true;
750         if (c < 0xE000) return false;  if (c <= 0xFFFD) return true;
751         if (c < 0x10000) return false;  if (c <= 0x10FFFF) return true;
752         
753         return false;
754     }
755 
756 
757     /**
758      * This is a utility function for determining whether a specified 
759      * character is a name character according to production 4 of the 
760      * XML 1.0 specification.
761      *
762      * @param c <code>char</code> to check for XML name compliance.
763      * @return <code>boolean</code> true if it's a name character, 
764      *                                false otherwise.
765      */
766     public static boolean isXMLNameCharacter(char c) {
767     
768       return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-' 
769                              || c == '_' || c == ':' || isXMLCombiningChar(c) 
770                              || isXMLExtender(c));
771     }
772 
773     /**
774      * This is a utility function for determining whether a specified 
775      * character is a legal name start character according to production 5
776      * of the XML 1.0 specification. This production does allow names
777      * to begin with colons which the Namespaces in XML Recommendation
778      * disallows. 
779      *
780      * @param c <code>char</code> to check for XML name start compliance.
781      * @return <code>boolean</code> true if it's a name start character, 
782      *                                false otherwise.
783      */
784     public static boolean isXMLNameStartCharacter(char c) {
785     
786       return (isXMLLetter(c) || c == '_' || c ==':');
787     
788     }
789 
790     /**
791      * This is a utility function for determining whether a specified 
792      * character is a letter or digit according to productions 84 and 88
793      * of the XML 1.0 specification.
794      *
795      * @param c <code>char</code> to check.
796      * @return <code>boolean</code> true if it's letter or digit, 
797      *                                false otherwise.
798      */
799     public static boolean isXMLLetterOrDigit(char c) {
800     
801       return (isXMLLetter(c) || isXMLDigit(c));
802     
803     }
804 
805     /**
806      * This is a utility function for determining whether a specified character
807      * is a letter according to production 84 of the XML 1.0 specification.
808      *
809      * @param c <code>char</code> to check for XML name compliance.
810      * @return <code>String</code> true if it's a letter, false otherwise.
811      */
812     public static boolean isXMLLetter(char c) {
813         // Note that order is very important here.  The search proceeds 
814         // from lowest to highest values, so that no searching occurs 
815         // above the character's value.  BTW, the first line is equivalent to:
816         // if (c >= 0x0041 && c <= 0x005A) return true;
817 
818         if (c < 0x0041) return false;  if (c <= 0x005a) return true;
819         if (c < 0x0061) return false;  if (c <= 0x007A) return true;
820         if (c < 0x00C0) return false;  if (c <= 0x00D6) return true;
821         if (c < 0x00D8) return false;  if (c <= 0x00F6) return true;
822         if (c < 0x00F8) return false;  if (c <= 0x00FF) return true;
823         if (c < 0x0100) return false;  if (c <= 0x0131) return true;
824         if (c < 0x0134) return false;  if (c <= 0x013E) return true;
825         if (c < 0x0141) return false;  if (c <= 0x0148) return true;
826         if (c < 0x014A) return false;  if (c <= 0x017E) return true;
827         if (c < 0x0180) return false;  if (c <= 0x01C3) return true;
828         if (c < 0x01CD) return false;  if (c <= 0x01F0) return true;
829         if (c < 0x01F4) return false;  if (c <= 0x01F5) return true;
830         if (c < 0x01FA) return false;  if (c <= 0x0217) return true;
831         if (c < 0x0250) return false;  if (c <= 0x02A8) return true;
832         if (c < 0x02BB) return false;  if (c <= 0x02C1) return true;
833         if (c == 0x0386) return true;
834         if (c < 0x0388) return false;  if (c <= 0x038A) return true;
835         if (c == 0x038C) return true;
836         if (c < 0x038E) return false;  if (c <= 0x03A1) return true;
837         if (c < 0x03A3) return false;  if (c <= 0x03CE) return true;
838         if (c < 0x03D0) return false;  if (c <= 0x03D6) return true;
839         if (c == 0x03DA) return true;
840         if (c == 0x03DC) return true;
841         if (c == 0x03DE) return true;
842         if (c == 0x03E0) return true;
843         if (c < 0x03E2) return false;  if (c <= 0x03F3) return true;
844         if (c < 0x0401) return false;  if (c <= 0x040C) return true;
845         if (c < 0x040E) return false;  if (c <= 0x044F) return true;
846         if (c < 0x0451) return false;  if (c <= 0x045C) return true;
847         if (c < 0x045E) return false;  if (c <= 0x0481) return true;
848         if (c < 0x0490) return false;  if (c <= 0x04C4) return true;
849         if (c < 0x04C7) return false;  if (c <= 0x04C8) return true;
850         if (c < 0x04CB) return false;  if (c <= 0x04CC) return true;
851         if (c < 0x04D0) return false;  if (c <= 0x04EB) return true;
852         if (c < 0x04EE) return false;  if (c <= 0x04F5) return true;
853         if (c < 0x04F8) return false;  if (c <= 0x04F9) return true;
854         if (c < 0x0531) return false;  if (c <= 0x0556) return true;
855         if (c == 0x0559) return true;
856         if (c < 0x0561) return false;  if (c <= 0x0586) return true;
857         if (c < 0x05D0) return false;  if (c <= 0x05EA) return true;
858         if (c < 0x05F0) return false;  if (c <= 0x05F2) return true;
859         if (c < 0x0621) return false;  if (c <= 0x063A) return true;
860         if (c < 0x0641) return false;  if (c <= 0x064A) return true;
861         if (c < 0x0671) return false;  if (c <= 0x06B7) return true;
862         if (c < 0x06BA) return false;  if (c <= 0x06BE) return true;
863         if (c < 0x06C0) return false;  if (c <= 0x06CE) return true;
864         if (c < 0x06D0) return false;  if (c <= 0x06D3) return true;
865         if (c == 0x06D5) return true;
866         if (c < 0x06E5) return false;  if (c <= 0x06E6) return true;
867         if (c < 0x0905) return false;  if (c <= 0x0939) return true;
868         if (c == 0x093D) return true;
869         if (c < 0x0958) return false;  if (c <= 0x0961) return true;
870         if (c < 0x0985) return false;  if (c <= 0x098C) return true;
871         if (c < 0x098F) return false;  if (c <= 0x0990) return true;
872         if (c < 0x0993) return false;  if (c <= 0x09A8) return true;
873         if (c < 0x09AA) return false;  if (c <= 0x09B0) return true;
874         if (c == 0x09B2) return true;
875         if (c < 0x09B6) return false;  if (c <= 0x09B9) return true;
876         if (c < 0x09DC) return false;  if (c <= 0x09DD) return true;
877         if (c < 0x09DF) return false;  if (c <= 0x09E1) return true;
878         if (c < 0x09F0) return false;  if (c <= 0x09F1) return true;
879         if (c < 0x0A05) return false;  if (c <= 0x0A0A) return true;
880         if (c < 0x0A0F) return false;  if (c <= 0x0A10) return true;
881         if (c < 0x0A13) return false;  if (c <= 0x0A28) return true;
882         if (c < 0x0A2A) return false;  if (c <= 0x0A30) return true;
883         if (c < 0x0A32) return false;  if (c <= 0x0A33) return true;
884         if (c < 0x0A35) return false;  if (c <= 0x0A36) return true;
885         if (c < 0x0A38) return false;  if (c <= 0x0A39) return true;
886         if (c < 0x0A59) return false;  if (c <= 0x0A5C) return true;
887         if (c == 0x0A5E) return true;
888         if (c < 0x0A72) return false;  if (c <= 0x0A74) return true;
889         if (c < 0x0A85) return false;  if (c <= 0x0A8B) return true;
890         if (c == 0x0A8D) return true;
891         if (c < 0x0A8F) return false;  if (c <= 0x0A91) return true;
892         if (c < 0x0A93) return false;  if (c <= 0x0AA8) return true;
893         if (c < 0x0AAA) return false;  if (c <= 0x0AB0) return true;
894         if (c < 0x0AB2) return false;  if (c <= 0x0AB3) return true;
895         if (c < 0x0AB5) return false;  if (c <= 0x0AB9) return true;
896         if (c == 0x0ABD) return true;
897         if (c == 0x0AE0) return true;
898         if (c < 0x0B05) return false;  if (c <= 0x0B0C) return true;
899         if (c < 0x0B0F) return false;  if (c <= 0x0B10) return true;
900         if (c < 0x0B13) return false;  if (c <= 0x0B28) return true;
901         if (c < 0x0B2A) return false;  if (c <= 0x0B30) return true;
902         if (c < 0x0B32) return false;  if (c <= 0x0B33) return true;
903         if (c < 0x0B36) return false;  if (c <= 0x0B39) return true;
904         if (c == 0x0B3D) return true;
905         if (c < 0x0B5C) return false;  if (c <= 0x0B5D) return true;
906         if (c < 0x0B5F) return false;  if (c <= 0x0B61) return true;
907         if (c < 0x0B85) return false;  if (c <= 0x0B8A) return true;
908         if (c < 0x0B8E) return false;  if (c <= 0x0B90) return true;
909         if (c < 0x0B92) return false;  if (c <= 0x0B95) return true;
910         if (c < 0x0B99) return false;  if (c <= 0x0B9A) return true;
911         if (c == 0x0B9C) return true;
912         if (c < 0x0B9E) return false;  if (c <= 0x0B9F) return true;
913         if (c < 0x0BA3) return false;  if (c <= 0x0BA4) return true;
914         if (c < 0x0BA8) return false;  if (c <= 0x0BAA) return true;
915         if (c < 0x0BAE) return false;  if (c <= 0x0BB5) return true;
916         if (c < 0x0BB7) return false;  if (c <= 0x0BB9) return true;
917         if (c < 0x0C05) return false;  if (c <= 0x0C0C) return true;
918         if (c < 0x0C0E) return false;  if (c <= 0x0C10) return true;
919         if (c < 0x0C12) return false;  if (c <= 0x0C28) return true;
920         if (c < 0x0C2A) return false;  if (c <= 0x0C33) return true;
921         if (c < 0x0C35) return false;  if (c <= 0x0C39) return true;
922         if (c < 0x0C60) return false;  if (c <= 0x0C61) return true;
923         if (c < 0x0C85) return false;  if (c <= 0x0C8C) return true;
924         if (c < 0x0C8E) return false;  if (c <= 0x0C90) return true;
925         if (c < 0x0C92) return false;  if (c <= 0x0CA8) return true;
926         if (c < 0x0CAA) return false;  if (c <= 0x0CB3) return true;
927         if (c < 0x0CB5) return false;  if (c <= 0x0CB9) return true;
928         if (c == 0x0CDE) return true;
929         if (c < 0x0CE0) return false;  if (c <= 0x0CE1) return true;
930         if (c < 0x0D05) return false;  if (c <= 0x0D0C) return true;
931         if (c < 0x0D0E) return false;  if (c <= 0x0D10) return true;
932         if (c < 0x0D12) return false;  if (c <= 0x0D28) return true;
933         if (c < 0x0D2A) return false;  if (c <= 0x0D39) return true;
934         if (c < 0x0D60) return false;  if (c <= 0x0D61) return true;
935         if (c < 0x0E01) return false;  if (c <= 0x0E2E) return true;
936         if (c == 0x0E30) return true;
937         if (c < 0x0E32) return false;  if (c <= 0x0E33) return true;
938         if (c < 0x0E40) return false;  if (c <= 0x0E45) return true;
939         if (c < 0x0E81) return false;  if (c <= 0x0E82) return true;
940         if (c == 0x0E84) return true;
941         if (c < 0x0E87) return false;  if (c <= 0x0E88) return true;
942         if (c == 0x0E8A) return true;
943         if (c == 0x0E8D) return true;
944         if (c < 0x0E94) return false;  if (c <= 0x0E97) return true;
945         if (c < 0x0E99) return false;  if (c <= 0x0E9F) return true;
946         if (c < 0x0EA1) return false;  if (c <= 0x0EA3) return true;
947         if (c == 0x0EA5) return true;
948         if (c == 0x0EA7) return true;
949         if (c < 0x0EAA) return false;  if (c <= 0x0EAB) return true;
950         if (c < 0x0EAD) return false;  if (c <= 0x0EAE) return true;
951         if (c == 0x0EB0) return true;
952         if (c < 0x0EB2) return false;  if (c <= 0x0EB3) return true;
953         if (c == 0x0EBD) return true;
954         if (c < 0x0EC0) return false;  if (c <= 0x0EC4) return true;
955         if (c < 0x0F40) return false;  if (c <= 0x0F47) return true;
956         if (c < 0x0F49) return false;  if (c <= 0x0F69) return true;
957         if (c < 0x10A0) return false;  if (c <= 0x10C5) return true;
958         if (c < 0x10D0) return false;  if (c <= 0x10F6) return true;
959         if (c == 0x1100) return true;
960         if (c < 0x1102) return false;  if (c <= 0x1103) return true;
961         if (c < 0x1105) return false;  if (c <= 0x1107) return true;
962         if (c == 0x1109) return true;
963         if (c < 0x110B) return false;  if (c <= 0x110C) return true;
964         if (c < 0x110E) return false;  if (c <= 0x1112) return true;
965         if (c == 0x113C) return true;
966         if (c == 0x113E) return true;
967         if (c == 0x1140) return true;
968         if (c == 0x114C) return true;
969         if (c == 0x114E) return true;
970         if (c == 0x1150) return true;
971         if (c < 0x1154) return false;  if (c <= 0x1155) return true;
972         if (c == 0x1159) return true;
973         if (c < 0x115F) return false;  if (c <= 0x1161) return true;
974         if (c == 0x1163) return true;
975         if (c == 0x1165) return true;
976         if (c == 0x1167) return true;
977         if (c == 0x1169) return true;
978         if (c < 0x116D) return false;  if (c <= 0x116E) return true;
979         if (c < 0x1172) return false;  if (c <= 0x1173) return true;
980         if (c == 0x1175) return true;
981         if (c == 0x119E) return true;
982         if (c == 0x11A8) return true;
983         if (c == 0x11AB) return true;
984         if (c < 0x11AE) return false;  if (c <= 0x11AF) return true;
985         if (c < 0x11B7) return false;  if (c <= 0x11B8) return true;
986         if (c == 0x11BA) return true;
987         if (c < 0x11BC) return false;  if (c <= 0x11C2) return true;
988         if (c == 0x11EB) return true;
989         if (c == 0x11F0) return true;
990         if (c == 0x11F9) return true;
991         if (c < 0x1E00) return false;  if (c <= 0x1E9B) return true;
992         if (c < 0x1EA0) return false;  if (c <= 0x1EF9) return true;
993         if (c < 0x1F00) return false;  if (c <= 0x1F15) return true;
994         if (c < 0x1F18) return false;  if (c <= 0x1F1D) return true;
995         if (c < 0x1F20) return false;  if (c <= 0x1F45) return true;
996         if (c < 0x1F48) return false;  if (c <= 0x1F4D) return true;
997         if (c < 0x1F50) return false;  if (c <= 0x1F57) return true;
998         if (c == 0x1F59) return true;
999         if (c == 0x1F5B) return true;
1000        if (c == 0x1F5D) return true;
1001        if (c < 0x1F5F) return false;  if (c <= 0x1F7D) return true;
1002        if (c < 0x1F80) return false;  if (c <= 0x1FB4) return true;
1003        if (c < 0x1FB6) return false;  if (c <= 0x1FBC) return true;
1004        if (c == 0x1FBE) return true;
1005        if (c < 0x1FC2) return false;  if (c <= 0x1FC4) return true;
1006        if (c < 0x1FC6) return false;  if (c <= 0x1FCC) return true;
1007        if (c < 0x1FD0) return false;  if (c <= 0x1FD3) return true;
1008        if (c < 0x1FD6) return false;  if (c <= 0x1FDB) return true;
1009        if (c < 0x1FE0) return false;  if (c <= 0x1FEC) return true;
1010        if (c < 0x1FF2) return false;  if (c <= 0x1FF4) return true;
1011        if (c < 0x1FF6) return false;  if (c <= 0x1FFC) return true;
1012        if (c == 0x2126) return true;
1013        if (c < 0x212A) return false;  if (c <= 0x212B) return true;
1014        if (c == 0x212E) return true;
1015        if (c < 0x2180) return false;  if (c <= 0x2182) return true;
1016        if (c == 0x3007) return true;                          // ideographic
1017        if (c < 0x3021) return false;  if (c <= 0x3029) return true;  // ideo
1018        if (c < 0x3041) return false;  if (c <= 0x3094) return true;
1019        if (c < 0x30A1) return false;  if (c <= 0x30FA) return true;
1020        if (c < 0x3105) return false;  if (c <= 0x312C) return true;
1021        if (c < 0x4E00) return false;  if (c <= 0x9FA5) return true;  // ideo
1022        if (c < 0xAC00) return false;  if (c <= 0xD7A3) return true;
1023      
1024        return false;
1025        
1026    }
1027
1028    /**
1029     * This is a utility function for determining whether a specified character
1030     * is a combining character according to production 87
1031     * of the XML 1.0 specification.
1032     *
1033     * @param c <code>char</code> to check.
1034     * @return <code>boolean</code> true if it's a combining character,
1035     *         false otherwise.
1036     */
1037    public static boolean isXMLCombiningChar(char c) {
1038        // CombiningChar
1039        if (c < 0x0300) return false;  if (c <= 0x0345) return true;
1040        if (c < 0x0360) return false;  if (c <= 0x0361) return true;
1041        if (c < 0x0483) return false;  if (c <= 0x0486) return true;
1042        if (c < 0x0591) return false;  if (c <= 0x05A1) return true;
1043                                       
1044        if (c < 0x05A3) return false;  if (c <= 0x05B9) return true;
1045        if (c < 0x05BB) return false;  if (c <= 0x05BD) return true;
1046        if (c == 0x05BF) return true;
1047        if (c < 0x05C1) return false;  if (c <= 0x05C2) return true;
1048                                       
1049        if (c == 0x05C4) return true;
1050        if (c < 0x064B) return false;  if (c <= 0x0652) return true;
1051        if (c == 0x0670) return true;
1052        if (c < 0x06D6) return false;  if (c <= 0x06DC) return true;
1053                                       
1054        if (c < 0x06DD) return false;  if (c <= 0x06DF) return true;
1055        if (c < 0x06E0) return false;  if (c <= 0x06E4) return true;
1056        if (c < 0x06E7) return false;  if (c <= 0x06E8) return true;
1057                                       
1058        if (c < 0x06EA) return false;  if (c <= 0x06ED) return true;
1059        if (c < 0x0901) return false;  if (c <= 0x0903) return true;
1060        if (c == 0x093C) return true;
1061        if (c < 0x093E) return false;  if (c <= 0x094C) return true;
1062                                       
1063        if (c == 0x094D) return true;
1064        if (c < 0x0951) return false;  if (c <= 0x0954) return true;
1065        if (c < 0x0962) return false;  if (c <= 0x0963) return true;
1066        if (c < 0x0981) return false;  if (c <= 0x0983) return true;
1067                                       
1068        if (c == 0x09BC) return true;
1069        if (c == 0x09BE) return true;
1070        if (c == 0x09BF) return true;
1071        if (c < 0x09C0) return false;  if (c <= 0x09C4) return true;
1072        if (c < 0x09C7) return false;  if (c <= 0x09C8) return true;
1073                                       
1074        if (c < 0x09CB) return false;  if (c <= 0x09CD) return true;
1075        if (c == 0x09D7) return true;
1076        if (c < 0x09E2) return false;  if (c <= 0x09E3) return true;
1077        if (c == 0x0A02) return true;
1078        if (c == 0x0A3C) return true;
1079                                       
1080        if (c == 0x0A3E) return true;
1081        if (c == 0x0A3F) return true;
1082        if (c < 0x0A40) return false;  if (c <= 0x0A42) return true;
1083        if (c < 0x0A47) return false;  if (c <= 0x0A48) return true;
1084                                       
1085        if (c < 0x0A4B) return false;  if (c <= 0x0A4D) return true;
1086        if (c < 0x0A70) return false;  if (c <= 0x0A71) return true;
1087        if (c < 0x0A81) return false;  if (c <= 0x0A83) return true;
1088        if (c == 0x0ABC) return true;
1089                                       
1090        if (c < 0x0ABE) return false;  if (c <= 0x0AC5) return true;
1091        if (c < 0x0AC7) return false;  if (c <= 0x0AC9) return true;
1092        if (c < 0x0ACB) return false;  if (c <= 0x0ACD) return true;
1093                                       
1094        if (c < 0x0B01) return false;  if (c <= 0x0B03) return true;
1095        if (c == 0x0B3C) return true;
1096        if (c < 0x0B3E) return false;  if (c <= 0x0B43) return true;
1097        if (c < 0x0B47) return false;  if (c <= 0x0B48) return true;
1098                                       
1099        if (c < 0x0B4B) return false;  if (c <= 0x0B4D) return true;
1100        if (c < 0x0B56) return false;  if (c <= 0x0B57) return true;
1101        if (c < 0x0B82) return false;  if (c <= 0x0B83) return true;
1102                                       
1103        if (c < 0x0BBE) return false;  if (c <= 0x0BC2) return true;
1104        if (c < 0x0BC6) return false;  if (c <= 0x0BC8) return true;
1105        if (c < 0x0BCA) return false;  if (c <= 0x0BCD) return true;
1106        if (c == 0x0BD7) return true;
1107                                       
1108        if (c < 0x0C01) return false;  if (c <= 0x0C03) return true;
1109        if (c < 0x0C3E) return false;  if (c <= 0x0C44) return true;
1110        if (c < 0x0C46) return false;  if (c <= 0x0C48) return true;
1111                                       
1112        if (c < 0x0C4A) return false;  if (c <= 0x0C4D) return true;
1113        if (c < 0x0C55) return false;  if (c <= 0x0C56) return true;
1114        if (c < 0x0C82) return false;  if (c <= 0x0C83) return true;
1115                                       
1116        if (c < 0x0CBE) return false;  if (c <= 0x0CC4) return true;
1117        if (c < 0x0CC6) return false;  if (c <= 0x0CC8) return true;
1118        if (c < 0x0CCA) return false;  if (c <= 0x0CCD) return true;
1119                                       
1120        if (c < 0x0CD5) return false;  if (c <= 0x0CD6) return true;
1121        if (c < 0x0D02) return false;  if (c <= 0x0D03) return true;
1122        if (c < 0x0D3E) return false;  if (c <= 0x0D43) return true;
1123                                       
1124        if (c < 0x0D46) return false;  if (c <= 0x0D48) return true;
1125        if (c < 0x0D4A) return false;  if (c <= 0x0D4D) return true;
1126        if (c == 0x0D57) return true;
1127        if (c == 0x0E31) return true;
1128                                       
1129        if (c < 0x0E34) return false;  if (c <= 0x0E3A) return true;
1130        if (c < 0x0E47) return false;  if (c <= 0x0E4E) return true;
1131        if (c == 0x0EB1) return true;
1132        if (c < 0x0EB4) return false;  if (c <= 0x0EB9) return true;
1133                                       
1134        if (c < 0x0EBB) return false;  if (c <= 0x0EBC) return true;
1135        if (c < 0x0EC8) return false;  if (c <= 0x0ECD) return true;
1136        if (c < 0x0F18) return false;  if (c <= 0x0F19) return true;
1137        if (c == 0x0F35) return true;
1138                                       
1139        if (c == 0x0F37) return true;
1140        if (c == 0x0F39) return true;
1141        if (c == 0x0F3E) return true;
1142        if (c == 0x0F3F) return true;
1143        if (c < 0x0F71) return false;  if (c <= 0x0F84) return true;
1144                                       
1145        if (c < 0x0F86) return false;  if (c <= 0x0F8B) return true;
1146        if (c < 0x0F90) return false;  if (c <= 0x0F95) return true;
1147        if (c == 0x0F97) return true;
1148        if (c < 0x0F99) return false;  if (c <= 0x0FAD) return true;
1149                                       
1150        if (c < 0x0FB1) return false;  if (c <= 0x0FB7) return true;
1151        if (c == 0x0FB9) return true;
1152        if (c < 0x20D0) return false;  if (c <= 0x20DC) return true;
1153        if (c == 0x20E1) return true;
1154                                       
1155        if (c < 0x302A) return false;  if (c <= 0x302F) return true;
1156        if (c == 0x3099) return true;
1157        if (c == 0x309A) return true; 
1158        
1159        return false;
1160        
1161    }
1162    
1163    /**
1164     * This is a utility function for determining whether a specified 
1165     * character is an extender according to production 88 of the XML 1.0
1166     * specification.
1167     *
1168     * @param c <code>char</code> to check.
1169     * @return <code>String</code> true if it's an extender, false otherwise.
1170     */
1171    public static boolean isXMLExtender(char c) {
1172
1173        if (c < 0x00B6) return false;  // quick short circuit
1174
1175        // Extenders                               
1176        if (c == 0x00B7) return true;
1177        if (c == 0x02D0) return true;
1178        if (c == 0x02D1) return true;
1179        if (c == 0x0387) return true;
1180        if (c == 0x0640) return true;
1181        if (c == 0x0E46) return true;
1182        if (c == 0x0EC6) return true;
1183        if (c == 0x3005) return true;
1184                                       
1185        if (c < 0x3031) return false;  if (c <= 0x3035) return true;
1186        if (c < 0x309D) return false;  if (c <= 0x309E) return true;
1187        if (c < 0x30FC) return false;  if (c <= 0x30FE) return true;
1188        
1189        return false;
1190        
1191    }
1192      
1193    /**
1194     * This is a utility function for determining whether a specified 
1195     * Unicode character
1196     * is a digit according to production 88 of the XML 1.0 specification.
1197     *
1198     * @param c <code>char</code> to check for XML digit compliance
1199     * @return <code>boolean</code> true if it's a digit, false otherwise
1200     */
1201    public static boolean isXMLDigit(char c) {
1202      
1203        if (c < 0x0030) return false;  if (c <= 0x0039) return true;
1204        if (c < 0x0660) return false;  if (c <= 0x0669) return true;
1205        if (c < 0x06F0) return false;  if (c <= 0x06F9) return true;
1206        if (c < 0x0966) return false;  if (c <= 0x096F) return true;
1207                                       
1208        if (c < 0x09E6) return false;  if (c <= 0x09EF) return true;
1209        if (c < 0x0A66) return false;  if (c <= 0x0A6F) return true;
1210        if (c < 0x0AE6) return false;  if (c <= 0x0AEF) return true;
1211                                       
1212        if (c < 0x0B66) return false;  if (c <= 0x0B6F) return true;
1213        if (c < 0x0BE7) return false;  if (c <= 0x0BEF) return true;
1214        if (c < 0x0C66) return false;  if (c <= 0x0C6F) return true;
1215                                       
1216        if (c < 0x0CE6) return false;  if (c <= 0x0CEF) return true;
1217        if (c < 0x0D66) return false;  if (c <= 0x0D6F) return true;
1218        if (c < 0x0E50) return false;  if (c <= 0x0E59) return true;
1219                                       
1220        if (c < 0x0ED0) return false;  if (c <= 0x0ED9) return true;
1221        if (c < 0x0F20) return false;  if (c <= 0x0F29) return true; 
1222      
1223        return false;
1224    }  
1225    
1226}