Save This Page
Home » openjdk-7 » java » lang » [javadoc | source]
    1   /*
    2    * Copyright 2002-2006 Sun Microsystems, Inc.  All Rights Reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Sun designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Sun in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   22    * CA 95054 USA or visit www.sun.com if you need additional information or
   23    * have any questions.
   24    */
   25   
   26   package java.lang;
   27   import java.util.Map;
   28   import java.util.HashMap;
   29   import java.util.Locale;
   30   
   31   /**
   32    * The <code>Character</code> class wraps a value of the primitive
   33    * type <code>char</code> in an object. An object of type
   34    * <code>Character</code> contains a single field whose type is
   35    * <code>char</code>.
   36    * <p>
   37    * In addition, this class provides several methods for determining
   38    * a character's category (lowercase letter, digit, etc.) and for converting
   39    * characters from uppercase to lowercase and vice versa.
   40    * <p>
   41    * Character information is based on the Unicode Standard, version 4.0.
   42    * <p>
   43    * The methods and data of class <code>Character</code> are defined by
   44    * the information in the <i>UnicodeData</i> file that is part of the
   45    * Unicode Character Database maintained by the Unicode
   46    * Consortium. This file specifies various properties including name
   47    * and general category for every defined Unicode code point or
   48    * character range.
   49    * <p>
   50    * The file and its description are available from the Unicode Consortium at:
   51    * <ul>
   52    * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
   53    * </ul>
   54    *
   55    * <h4><a name="unicode">Unicode Character Representations</a></h4>
   56    *
   57    * <p>The <code>char</code> data type (and therefore the value that a
   58    * <code>Character</code> object encapsulates) are based on the
   59    * original Unicode specification, which defined characters as
   60    * fixed-width 16-bit entities. The Unicode standard has since been
   61    * changed to allow for characters whose representation requires more
   62    * than 16 bits.  The range of legal <em>code point</em>s is now
   63    * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
   64    * (Refer to the <a
   65    * href="http://www.unicode.org/reports/tr27/#notation"><i>
   66    * definition</i></a> of the U+<i>n</i> notation in the Unicode
   67    * standard.)
   68    *
   69    * <p>The set of characters from U+0000 to U+FFFF is sometimes
   70    * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
   71    * name="supplementary">Characters</a> whose code points are greater
   72    * than U+FFFF are called <em>supplementary character</em>s.  The Java
   73    * 2 platform uses the UTF-16 representation in <code>char</code>
   74    * arrays and in the <code>String</code> and <code>StringBuffer</code>
   75    * classes. In this representation, supplementary characters are
   76    * represented as a pair of <code>char</code> values, the first from
   77    * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
   78    * second from the <em>low-surrogates</em> range
   79    * (&#92;uDC00-&#92;uDFFF).
   80    *
   81    * <p>A <code>char</code> value, therefore, represents Basic
   82    * Multilingual Plane (BMP) code points, including the surrogate
   83    * code points, or code units of the UTF-16 encoding. An
   84    * <code>int</code> value represents all Unicode code points,
   85    * including supplementary code points. The lower (least significant)
   86    * 21 bits of <code>int</code> are used to represent Unicode code
   87    * points and the upper (most significant) 11 bits must be zero.
   88    * Unless otherwise specified, the behavior with respect to
   89    * supplementary characters and surrogate <code>char</code> values is
   90    * as follows:
   91    *
   92    * <ul>
   93    * <li>The methods that only accept a <code>char</code> value cannot support
   94    * supplementary characters. They treat <code>char</code> values from the
   95    * surrogate ranges as undefined characters. For example,
   96    * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
   97    * this specific value if followed by any low-surrogate value in a string
   98    * would represent a letter.
   99    *
  100    * <li>The methods that accept an <code>int</code> value support all
  101    * Unicode characters, including supplementary characters. For
  102    * example, <code>Character.isLetter(0x2F81A)</code> returns
  103    * <code>true</code> because the code point value represents a letter
  104    * (a CJK ideograph).
  105    * </ul>
  106    *
  107    * <p>In the Java SE API documentation, <em>Unicode code point</em> is
  108    * used for character values in the range between U+0000 and U+10FFFF,
  109    * and <em>Unicode code unit</em> is used for 16-bit
  110    * <code>char</code> values that are code units of the <em>UTF-16</em>
  111    * encoding. For more information on Unicode terminology, refer to the
  112    * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
  113    *
  114    * @author  Lee Boynton
  115    * @author  Guy Steele
  116    * @author  Akira Tanaka
  117    * @since   1.0
  118    */
  119   public final
  120   class Character extends Object implements java.io.Serializable, Comparable<Character> {
  121       /**
  122        * The minimum radix available for conversion to and from strings.
  123        * The constant value of this field is the smallest value permitted
  124        * for the radix argument in radix-conversion methods such as the
  125        * <code>digit</code> method, the <code>forDigit</code>
  126        * method, and the <code>toString</code> method of class
  127        * <code>Integer</code>.
  128        *
  129        * @see     java.lang.Character#digit(char, int)
  130        * @see     java.lang.Character#forDigit(int, int)
  131        * @see     java.lang.Integer#toString(int, int)
  132        * @see     java.lang.Integer#valueOf(java.lang.String)
  133        */
  134       public static final int MIN_RADIX = 2;
  135   
  136       /**
  137        * The maximum radix available for conversion to and from strings.
  138        * The constant value of this field is the largest value permitted
  139        * for the radix argument in radix-conversion methods such as the
  140        * <code>digit</code> method, the <code>forDigit</code>
  141        * method, and the <code>toString</code> method of class
  142        * <code>Integer</code>.
  143        *
  144        * @see     java.lang.Character#digit(char, int)
  145        * @see     java.lang.Character#forDigit(int, int)
  146        * @see     java.lang.Integer#toString(int, int)
  147        * @see     java.lang.Integer#valueOf(java.lang.String)
  148        */
  149       public static final int MAX_RADIX = 36;
  150   
  151       /**
  152        * The constant value of this field is the smallest value of type
  153        * <code>char</code>, <code>'&#92;u0000'</code>.
  154        *
  155        * @since   1.0.2
  156        */
  157       public static final char   MIN_VALUE = '\u0000';
  158   
  159       /**
  160        * The constant value of this field is the largest value of type
  161        * <code>char</code>, <code>'&#92;uFFFF'</code>.
  162        *
  163        * @since   1.0.2
  164        */
  165       public static final char   MAX_VALUE = '\uffff';
  166   
  167       /**
  168        * The <code>Class</code> instance representing the primitive type
  169        * <code>char</code>.
  170        *
  171        * @since   1.1
  172        */
  173       public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
  174   
  175      /*
  176       * Normative general types
  177       */
  178   
  179      /*
  180       * General character types
  181       */
  182   
  183      /**
  184       * General category "Cn" in the Unicode specification.
  185       * @since   1.1
  186       */
  187       public static final byte
  188           UNASSIGNED                  = 0;
  189   
  190      /**
  191       * General category "Lu" in the Unicode specification.
  192       * @since   1.1
  193       */
  194       public static final byte
  195           UPPERCASE_LETTER            = 1;
  196   
  197      /**
  198       * General category "Ll" in the Unicode specification.
  199       * @since   1.1
  200       */
  201       public static final byte
  202           LOWERCASE_LETTER            = 2;
  203   
  204      /**
  205       * General category "Lt" in the Unicode specification.
  206       * @since   1.1
  207       */
  208       public static final byte
  209           TITLECASE_LETTER            = 3;
  210   
  211      /**
  212       * General category "Lm" in the Unicode specification.
  213       * @since   1.1
  214       */
  215       public static final byte
  216           MODIFIER_LETTER             = 4;
  217   
  218      /**
  219       * General category "Lo" in the Unicode specification.
  220       * @since   1.1
  221       */
  222       public static final byte
  223           OTHER_LETTER                = 5;
  224   
  225      /**
  226       * General category "Mn" in the Unicode specification.
  227       * @since   1.1
  228       */
  229       public static final byte
  230           NON_SPACING_MARK            = 6;
  231   
  232      /**
  233       * General category "Me" in the Unicode specification.
  234       * @since   1.1
  235       */
  236       public static final byte
  237           ENCLOSING_MARK              = 7;
  238   
  239      /**
  240       * General category "Mc" in the Unicode specification.
  241       * @since   1.1
  242       */
  243       public static final byte
  244           COMBINING_SPACING_MARK      = 8;
  245   
  246      /**
  247       * General category "Nd" in the Unicode specification.
  248       * @since   1.1
  249       */
  250       public static final byte
  251           DECIMAL_DIGIT_NUMBER        = 9;
  252   
  253      /**
  254       * General category "Nl" in the Unicode specification.
  255       * @since   1.1
  256       */
  257       public static final byte
  258           LETTER_NUMBER               = 10;
  259   
  260      /**
  261       * General category "No" in the Unicode specification.
  262       * @since   1.1
  263       */
  264       public static final byte
  265           OTHER_NUMBER                = 11;
  266   
  267      /**
  268       * General category "Zs" in the Unicode specification.
  269       * @since   1.1
  270       */
  271       public static final byte
  272           SPACE_SEPARATOR             = 12;
  273   
  274      /**
  275       * General category "Zl" in the Unicode specification.
  276       * @since   1.1
  277       */
  278       public static final byte
  279           LINE_SEPARATOR              = 13;
  280   
  281      /**
  282       * General category "Zp" in the Unicode specification.
  283       * @since   1.1
  284       */
  285       public static final byte
  286           PARAGRAPH_SEPARATOR         = 14;
  287   
  288      /**
  289       * General category "Cc" in the Unicode specification.
  290       * @since   1.1
  291       */
  292       public static final byte
  293           CONTROL                     = 15;
  294   
  295      /**
  296       * General category "Cf" in the Unicode specification.
  297       * @since   1.1
  298       */
  299       public static final byte
  300           FORMAT                      = 16;
  301   
  302      /**
  303       * General category "Co" in the Unicode specification.
  304       * @since   1.1
  305       */
  306       public static final byte
  307           PRIVATE_USE                 = 18;
  308   
  309      /**
  310       * General category "Cs" in the Unicode specification.
  311       * @since   1.1
  312       */
  313       public static final byte
  314           SURROGATE                   = 19;
  315   
  316      /**
  317       * General category "Pd" in the Unicode specification.
  318       * @since   1.1
  319       */
  320       public static final byte
  321           DASH_PUNCTUATION            = 20;
  322   
  323      /**
  324       * General category "Ps" in the Unicode specification.
  325       * @since   1.1
  326       */
  327       public static final byte
  328           START_PUNCTUATION           = 21;
  329   
  330      /**
  331       * General category "Pe" in the Unicode specification.
  332       * @since   1.1
  333       */
  334       public static final byte
  335           END_PUNCTUATION             = 22;
  336   
  337      /**
  338       * General category "Pc" in the Unicode specification.
  339       * @since   1.1
  340       */
  341       public static final byte
  342           CONNECTOR_PUNCTUATION       = 23;
  343   
  344      /**
  345       * General category "Po" in the Unicode specification.
  346       * @since   1.1
  347       */
  348       public static final byte
  349           OTHER_PUNCTUATION           = 24;
  350   
  351      /**
  352       * General category "Sm" in the Unicode specification.
  353       * @since   1.1
  354       */
  355       public static final byte
  356           MATH_SYMBOL                 = 25;
  357   
  358      /**
  359       * General category "Sc" in the Unicode specification.
  360       * @since   1.1
  361       */
  362       public static final byte
  363           CURRENCY_SYMBOL             = 26;
  364   
  365      /**
  366       * General category "Sk" in the Unicode specification.
  367       * @since   1.1
  368       */
  369       public static final byte
  370           MODIFIER_SYMBOL             = 27;
  371   
  372      /**
  373       * General category "So" in the Unicode specification.
  374       * @since   1.1
  375       */
  376       public static final byte
  377           OTHER_SYMBOL                = 28;
  378   
  379      /**
  380       * General category "Pi" in the Unicode specification.
  381       * @since   1.4
  382       */
  383       public static final byte
  384           INITIAL_QUOTE_PUNCTUATION   = 29;
  385   
  386      /**
  387       * General category "Pf" in the Unicode specification.
  388       * @since   1.4
  389       */
  390       public static final byte
  391           FINAL_QUOTE_PUNCTUATION     = 30;
  392   
  393       /**
  394        * Error flag. Use int (code point) to avoid confusion with U+FFFF.
  395        */
  396        static final int ERROR = 0xFFFFFFFF;
  397   
  398   
  399       /**
  400        * Undefined bidirectional character type. Undefined <code>char</code>
  401        * values have undefined directionality in the Unicode specification.
  402        * @since 1.4
  403        */
  404        public static final byte DIRECTIONALITY_UNDEFINED = -1;
  405   
  406       /**
  407        * Strong bidirectional character type "L" in the Unicode specification.
  408        * @since 1.4
  409        */
  410       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
  411   
  412       /**
  413        * Strong bidirectional character type "R" in the Unicode specification.
  414        * @since 1.4
  415        */
  416       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
  417   
  418       /**
  419       * Strong bidirectional character type "AL" in the Unicode specification.
  420        * @since 1.4
  421        */
  422       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
  423   
  424       /**
  425        * Weak bidirectional character type "EN" in the Unicode specification.
  426        * @since 1.4
  427        */
  428       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
  429   
  430       /**
  431        * Weak bidirectional character type "ES" in the Unicode specification.
  432        * @since 1.4
  433        */
  434       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
  435   
  436       /**
  437        * Weak bidirectional character type "ET" in the Unicode specification.
  438        * @since 1.4
  439        */
  440       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
  441   
  442       /**
  443        * Weak bidirectional character type "AN" in the Unicode specification.
  444        * @since 1.4
  445        */
  446       public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
  447   
  448       /**
  449        * Weak bidirectional character type "CS" in the Unicode specification.
  450        * @since 1.4
  451        */
  452       public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
  453   
  454       /**
  455        * Weak bidirectional character type "NSM" in the Unicode specification.
  456        * @since 1.4
  457        */
  458       public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
  459   
  460       /**
  461        * Weak bidirectional character type "BN" in the Unicode specification.
  462        * @since 1.4
  463        */
  464       public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
  465   
  466       /**
  467        * Neutral bidirectional character type "B" in the Unicode specification.
  468        * @since 1.4
  469        */
  470       public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
  471   
  472       /**
  473        * Neutral bidirectional character type "S" in the Unicode specification.
  474        * @since 1.4
  475        */
  476       public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
  477   
  478       /**
  479        * Neutral bidirectional character type "WS" in the Unicode specification.
  480        * @since 1.4
  481        */
  482       public static final byte DIRECTIONALITY_WHITESPACE = 12;
  483   
  484       /**
  485        * Neutral bidirectional character type "ON" in the Unicode specification.
  486        * @since 1.4
  487        */
  488       public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
  489   
  490       /**
  491        * Strong bidirectional character type "LRE" in the Unicode specification.
  492        * @since 1.4
  493        */
  494       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
  495   
  496       /**
  497        * Strong bidirectional character type "LRO" in the Unicode specification.
  498        * @since 1.4
  499        */
  500       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
  501   
  502       /**
  503        * Strong bidirectional character type "RLE" in the Unicode specification.
  504        * @since 1.4
  505        */
  506       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
  507   
  508       /**
  509        * Strong bidirectional character type "RLO" in the Unicode specification.
  510        * @since 1.4
  511        */
  512       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
  513   
  514       /**
  515        * Weak bidirectional character type "PDF" in the Unicode specification.
  516        * @since 1.4
  517        */
  518       public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
  519   
  520       /**
  521        * The minimum value of a Unicode high-surrogate code unit in the
  522        * UTF-16 encoding. A high-surrogate is also known as a
  523        * <i>leading-surrogate</i>.
  524        *
  525        * @since 1.5
  526        */
  527       public static final char MIN_HIGH_SURROGATE = '\uD800';
  528   
  529       /**
  530        * The maximum value of a Unicode high-surrogate code unit in the
  531        * UTF-16 encoding. A high-surrogate is also known as a
  532        * <i>leading-surrogate</i>.
  533        *
  534        * @since 1.5
  535        */
  536       public static final char MAX_HIGH_SURROGATE = '\uDBFF';
  537   
  538       /**
  539        * The minimum value of a Unicode low-surrogate code unit in the
  540        * UTF-16 encoding. A low-surrogate is also known as a
  541        * <i>trailing-surrogate</i>.
  542        *
  543        * @since 1.5
  544        */
  545       public static final char MIN_LOW_SURROGATE  = '\uDC00';
  546   
  547       /**
  548        * The maximum value of a Unicode low-surrogate code unit in the
  549        * UTF-16 encoding. A low-surrogate is also known as a
  550        * <i>trailing-surrogate</i>.
  551        *
  552        * @since 1.5
  553        */
  554       public static final char MAX_LOW_SURROGATE  = '\uDFFF';
  555   
  556       /**
  557        * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
  558        *
  559        * @since 1.5
  560        */
  561       public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
  562   
  563       /**
  564        * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
  565        *
  566        * @since 1.5
  567        */
  568       public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
  569   
  570       /**
  571        * The minimum value of a supplementary code point.
  572        *
  573        * @since 1.5
  574        */
  575       public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
  576   
  577       /**
  578        * The minimum value of a Unicode code point.
  579        *
  580        * @since 1.5
  581        */
  582       public static final int MIN_CODE_POINT = 0x000000;
  583   
  584       /**
  585        * The maximum value of a Unicode code point.
  586        *
  587        * @since 1.5
  588        */
  589       public static final int MAX_CODE_POINT = 0x10ffff;
  590   
  591   
  592       /**
  593        * Instances of this class represent particular subsets of the Unicode
  594        * character set.  The only family of subsets defined in the
  595        * <code>Character</code> class is <code>{@link Character.UnicodeBlock
  596        * UnicodeBlock}</code>.  Other portions of the Java API may define other
  597        * subsets for their own purposes.
  598        *
  599        * @since 1.2
  600        */
  601       public static class Subset  {
  602   
  603           private String name;
  604   
  605           /**
  606            * Constructs a new <code>Subset</code> instance.
  607            *
  608            * @exception NullPointerException if name is <code>null</code>
  609            * @param  name  The name of this subset
  610            */
  611           protected Subset(String name) {
  612               if (name == null) {
  613                   throw new NullPointerException("name");
  614               }
  615               this.name = name;
  616           }
  617   
  618           /**
  619            * Compares two <code>Subset</code> objects for equality.
  620            * This method returns <code>true</code> if and only if
  621            * <code>this</code> and the argument refer to the same
  622            * object; since this method is <code>final</code>, this
  623            * guarantee holds for all subclasses.
  624            */
  625           public final boolean equals(Object obj) {
  626               return (this == obj);
  627           }
  628   
  629           /**
  630            * Returns the standard hash code as defined by the
  631            * <code>{@link Object#hashCode}</code> method.  This method
  632            * is <code>final</code> in order to ensure that the
  633            * <code>equals</code> and <code>hashCode</code> methods will
  634            * be consistent in all subclasses.
  635            */
  636           public final int hashCode() {
  637               return super.hashCode();
  638           }
  639   
  640           /**
  641            * Returns the name of this subset.
  642            */
  643           public final String toString() {
  644               return name;
  645           }
  646       }
  647   
  648       /**
  649        * A family of character subsets representing the character blocks in the
  650        * Unicode specification. Character blocks generally define characters
  651        * used for a specific script or purpose. A character is contained by
  652        * at most one Unicode block.
  653        *
  654        * @since 1.2
  655        */
  656       public static final class UnicodeBlock extends Subset {
  657   
  658           private static Map map = new HashMap();
  659   
  660           /**
  661            * Create a UnicodeBlock with the given identifier name.
  662            * This name must be the same as the block identifier.
  663            */
  664           private UnicodeBlock(String idName) {
  665               super(idName);
  666               map.put(idName.toUpperCase(Locale.US), this);
  667           }
  668   
  669           /**
  670            * Create a UnicodeBlock with the given identifier name and
  671            * alias name.
  672            */
  673           private UnicodeBlock(String idName, String alias) {
  674               this(idName);
  675               map.put(alias.toUpperCase(Locale.US), this);
  676           }
  677   
  678           /**
  679            * Create a UnicodeBlock with the given identifier name and
  680            * alias names.
  681            */
  682           private UnicodeBlock(String idName, String[] aliasName) {
  683               this(idName);
  684               if (aliasName != null) {
  685                   for(int x=0; x<aliasName.length; ++x) {
  686                       map.put(aliasName[x].toUpperCase(Locale.US), this);
  687                   }
  688               }
  689           }
  690   
  691           /**
  692            * Constant for the "Basic Latin" Unicode character block.
  693            * @since 1.2
  694            */
  695           public static final UnicodeBlock  BASIC_LATIN =
  696               new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" });
  697   
  698           /**
  699            * Constant for the "Latin-1 Supplement" Unicode character block.
  700            * @since 1.2
  701            */
  702           public static final UnicodeBlock LATIN_1_SUPPLEMENT =
  703               new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"});
  704   
  705           /**
  706            * Constant for the "Latin Extended-A" Unicode character block.
  707            * @since 1.2
  708            */
  709           public static final UnicodeBlock LATIN_EXTENDED_A =
  710               new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"});
  711   
  712           /**
  713            * Constant for the "Latin Extended-B" Unicode character block.
  714            * @since 1.2
  715            */
  716           public static final UnicodeBlock LATIN_EXTENDED_B =
  717               new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"});
  718   
  719           /**
  720            * Constant for the "IPA Extensions" Unicode character block.
  721            * @since 1.2
  722            */
  723           public static final UnicodeBlock IPA_EXTENSIONS =
  724               new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"});
  725   
  726           /**
  727            * Constant for the "Spacing Modifier Letters" Unicode character block.
  728            * @since 1.2
  729            */
  730           public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
  731               new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters",
  732                                                                           "SpacingModifierLetters"});
  733   
  734           /**
  735            * Constant for the "Combining Diacritical Marks" Unicode character block.
  736            * @since 1.2
  737            */
  738           public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
  739               new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks",
  740                                                                             "CombiningDiacriticalMarks" });
  741   
  742           /**
  743            * Constant for the "Greek and Coptic" Unicode character block.
  744            * <p>
  745            * This block was previously known as the "Greek" block.
  746            *
  747            * @since 1.2
  748            */
  749           public static final UnicodeBlock GREEK
  750               = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"});
  751   
  752           /**
  753            * Constant for the "Cyrillic" Unicode character block.
  754            * @since 1.2
  755            */
  756           public static final UnicodeBlock CYRILLIC =
  757               new UnicodeBlock("CYRILLIC");
  758   
  759           /**
  760            * Constant for the "Armenian" Unicode character block.
  761            * @since 1.2
  762            */
  763           public static final UnicodeBlock ARMENIAN =
  764               new UnicodeBlock("ARMENIAN");
  765   
  766           /**
  767            * Constant for the "Hebrew" Unicode character block.
  768            * @since 1.2
  769            */
  770           public static final UnicodeBlock HEBREW =
  771               new UnicodeBlock("HEBREW");
  772   
  773           /**
  774            * Constant for the "Arabic" Unicode character block.
  775            * @since 1.2
  776            */
  777           public static final UnicodeBlock ARABIC =
  778               new UnicodeBlock("ARABIC");
  779   
  780           /**
  781            * Constant for the "Devanagari" Unicode character block.
  782            * @since 1.2
  783            */
  784           public static final UnicodeBlock DEVANAGARI =
  785               new UnicodeBlock("DEVANAGARI");
  786   
  787           /**
  788            * Constant for the "Bengali" Unicode character block.
  789            * @since 1.2
  790            */
  791           public static final UnicodeBlock BENGALI =
  792               new UnicodeBlock("BENGALI");
  793   
  794           /**
  795            * Constant for the "Gurmukhi" Unicode character block.
  796            * @since 1.2
  797            */
  798           public static final UnicodeBlock GURMUKHI =
  799               new UnicodeBlock("GURMUKHI");
  800   
  801           /**
  802            * Constant for the "Gujarati" Unicode character block.
  803            * @since 1.2
  804            */
  805           public static final UnicodeBlock GUJARATI =
  806               new UnicodeBlock("GUJARATI");
  807   
  808           /**
  809            * Constant for the "Oriya" Unicode character block.
  810            * @since 1.2
  811            */
  812           public static final UnicodeBlock ORIYA =
  813               new UnicodeBlock("ORIYA");
  814   
  815           /**
  816            * Constant for the "Tamil" Unicode character block.
  817            * @since 1.2
  818            */
  819           public static final UnicodeBlock TAMIL =
  820               new UnicodeBlock("TAMIL");
  821   
  822           /**
  823            * Constant for the "Telugu" Unicode character block.
  824            * @since 1.2
  825            */
  826           public static final UnicodeBlock TELUGU =
  827               new UnicodeBlock("TELUGU");
  828   
  829           /**
  830            * Constant for the "Kannada" Unicode character block.
  831            * @since 1.2
  832            */
  833           public static final UnicodeBlock KANNADA =
  834               new UnicodeBlock("KANNADA");
  835   
  836           /**
  837            * Constant for the "Malayalam" Unicode character block.
  838            * @since 1.2
  839            */
  840           public static final UnicodeBlock MALAYALAM =
  841               new UnicodeBlock("MALAYALAM");
  842   
  843           /**
  844            * Constant for the "Thai" Unicode character block.
  845            * @since 1.2
  846            */
  847           public static final UnicodeBlock THAI =
  848               new UnicodeBlock("THAI");
  849   
  850           /**
  851            * Constant for the "Lao" Unicode character block.
  852            * @since 1.2
  853            */
  854           public static final UnicodeBlock LAO =
  855               new UnicodeBlock("LAO");
  856   
  857           /**
  858            * Constant for the "Tibetan" Unicode character block.
  859            * @since 1.2
  860            */
  861           public static final UnicodeBlock TIBETAN =
  862               new UnicodeBlock("TIBETAN");
  863   
  864           /**
  865            * Constant for the "Georgian" Unicode character block.
  866            * @since 1.2
  867            */
  868           public static final UnicodeBlock GEORGIAN =
  869               new UnicodeBlock("GEORGIAN");
  870   
  871           /**
  872            * Constant for the "Hangul Jamo" Unicode character block.
  873            * @since 1.2
  874            */
  875           public static final UnicodeBlock HANGUL_JAMO =
  876               new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"});
  877   
  878           /**
  879            * Constant for the "Latin Extended Additional" Unicode character block.
  880            * @since 1.2
  881            */
  882           public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
  883               new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional",
  884                                                                           "LatinExtendedAdditional"});
  885   
  886           /**
  887            * Constant for the "Greek Extended" Unicode character block.
  888            * @since 1.2
  889            */
  890           public static final UnicodeBlock GREEK_EXTENDED =
  891               new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"});
  892   
  893           /**
  894            * Constant for the "General Punctuation" Unicode character block.
  895            * @since 1.2
  896            */
  897           public static final UnicodeBlock GENERAL_PUNCTUATION =
  898               new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"});
  899   
  900           /**
  901            * Constant for the "Superscripts and Subscripts" Unicode character block.
  902            * @since 1.2
  903            */
  904           public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
  905               new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts",
  906                                                                             "SuperscriptsandSubscripts" });
  907   
  908           /**
  909            * Constant for the "Currency Symbols" Unicode character block.
  910            * @since 1.2
  911            */
  912           public static final UnicodeBlock CURRENCY_SYMBOLS =
  913               new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"});
  914   
  915           /**
  916            * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
  917            * <p>
  918            * This block was previously known as "Combining Marks for Symbols".
  919            * @since 1.2
  920            */
  921           public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
  922               new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols",
  923                                                                                                                                                     "CombiningDiacriticalMarksforSymbols",
  924                                                                             "Combining Marks for Symbols",
  925                                                                             "CombiningMarksforSymbols" });
  926   
  927           /**
  928            * Constant for the "Letterlike Symbols" Unicode character block.
  929            * @since 1.2
  930            */
  931           public static final UnicodeBlock LETTERLIKE_SYMBOLS =
  932               new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"});
  933   
  934           /**
  935            * Constant for the "Number Forms" Unicode character block.
  936            * @since 1.2
  937            */
  938           public static final UnicodeBlock NUMBER_FORMS =
  939               new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"});
  940   
  941           /**
  942            * Constant for the "Arrows" Unicode character block.
  943            * @since 1.2
  944            */
  945           public static final UnicodeBlock ARROWS =
  946               new UnicodeBlock("ARROWS");
  947   
  948           /**
  949            * Constant for the "Mathematical Operators" Unicode character block.
  950            * @since 1.2
  951            */
  952           public static final UnicodeBlock MATHEMATICAL_OPERATORS =
  953               new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators",
  954                                                                        "MathematicalOperators"});
  955   
  956           /**
  957            * Constant for the "Miscellaneous Technical" Unicode character block.
  958            * @since 1.2
  959            */
  960           public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
  961               new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical",
  962                                                                         "MiscellaneousTechnical"});
  963   
  964           /**
  965            * Constant for the "Control Pictures" Unicode character block.
  966            * @since 1.2
  967            */
  968           public static final UnicodeBlock CONTROL_PICTURES =
  969               new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"});
  970   
  971           /**
  972            * Constant for the "Optical Character Recognition" Unicode character block.
  973            * @since 1.2
  974            */
  975           public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
  976               new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition",
  977                                                                               "OpticalCharacterRecognition"});
  978   
  979           /**
  980            * Constant for the "Enclosed Alphanumerics" Unicode character block.
  981            * @since 1.2
  982            */
  983           public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
  984               new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics",
  985                                                                        "EnclosedAlphanumerics"});
  986   
  987           /**
  988            * Constant for the "Box Drawing" Unicode character block.
  989            * @since 1.2
  990            */
  991           public static final UnicodeBlock BOX_DRAWING =
  992               new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"});
  993   
  994           /**
  995            * Constant for the "Block Elements" Unicode character block.
  996            * @since 1.2
  997            */
  998           public static final UnicodeBlock BLOCK_ELEMENTS =
  999               new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"});
 1000   
 1001           /**
 1002            * Constant for the "Geometric Shapes" Unicode character block.
 1003            * @since 1.2
 1004            */
 1005           public static final UnicodeBlock GEOMETRIC_SHAPES =
 1006               new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"});
 1007   
 1008           /**
 1009            * Constant for the "Miscellaneous Symbols" Unicode character block.
 1010            * @since 1.2
 1011            */
 1012           public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
 1013               new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols",
 1014                                                                       "MiscellaneousSymbols"});
 1015   
 1016           /**
 1017            * Constant for the "Dingbats" Unicode character block.
 1018            * @since 1.2
 1019            */
 1020           public static final UnicodeBlock DINGBATS =
 1021               new UnicodeBlock("DINGBATS");
 1022   
 1023           /**
 1024            * Constant for the "CJK Symbols and Punctuation" Unicode character block.
 1025            * @since 1.2
 1026            */
 1027           public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
 1028               new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation",
 1029                                                                             "CJKSymbolsandPunctuation"});
 1030   
 1031           /**
 1032            * Constant for the "Hiragana" Unicode character block.
 1033            * @since 1.2
 1034            */
 1035           public static final UnicodeBlock HIRAGANA =
 1036               new UnicodeBlock("HIRAGANA");
 1037   
 1038           /**
 1039            * Constant for the "Katakana" Unicode character block.
 1040            * @since 1.2
 1041            */
 1042           public static final UnicodeBlock KATAKANA =
 1043               new UnicodeBlock("KATAKANA");
 1044   
 1045           /**
 1046            * Constant for the "Bopomofo" Unicode character block.
 1047            * @since 1.2
 1048            */
 1049           public static final UnicodeBlock BOPOMOFO =
 1050               new UnicodeBlock("BOPOMOFO");
 1051   
 1052           /**
 1053            * Constant for the "Hangul Compatibility Jamo" Unicode character block.
 1054            * @since 1.2
 1055            */
 1056           public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
 1057               new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo",
 1058                                                                           "HangulCompatibilityJamo"});
 1059   
 1060           /**
 1061            * Constant for the "Kanbun" Unicode character block.
 1062            * @since 1.2
 1063            */
 1064           public static final UnicodeBlock KANBUN =
 1065               new UnicodeBlock("KANBUN");
 1066   
 1067           /**
 1068            * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
 1069            * @since 1.2
 1070            */
 1071           public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
 1072               new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months",
 1073                                                                                 "EnclosedCJKLettersandMonths"});
 1074   
 1075           /**
 1076            * Constant for the "CJK Compatibility" Unicode character block.
 1077            * @since 1.2
 1078            */
 1079           public static final UnicodeBlock CJK_COMPATIBILITY =
 1080               new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"});
 1081   
 1082           /**
 1083            * Constant for the "CJK Unified Ideographs" Unicode character block.
 1084            * @since 1.2
 1085            */
 1086           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
 1087               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs",
 1088                                                                        "CJKUnifiedIdeographs"});
 1089   
 1090           /**
 1091            * Constant for the "Hangul Syllables" Unicode character block.
 1092            * @since 1.2
 1093            */
 1094           public static final UnicodeBlock HANGUL_SYLLABLES =
 1095               new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"});
 1096   
 1097           /**
 1098            * Constant for the "Private Use Area" Unicode character block.
 1099            * @since 1.2
 1100            */
 1101           public static final UnicodeBlock PRIVATE_USE_AREA =
 1102               new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"});
 1103   
 1104           /**
 1105            * Constant for the "CJK Compatibility Ideographs" Unicode character block.
 1106            * @since 1.2
 1107            */
 1108           public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
 1109               new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
 1110                                new String[] {"CJK Compatibility Ideographs",
 1111                                              "CJKCompatibilityIdeographs"});
 1112   
 1113           /**
 1114            * Constant for the "Alphabetic Presentation Forms" Unicode character block.
 1115            * @since 1.2
 1116            */
 1117           public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
 1118               new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms",
 1119                                                                               "AlphabeticPresentationForms"});
 1120   
 1121           /**
 1122            * Constant for the "Arabic Presentation Forms-A" Unicode character block.
 1123            * @since 1.2
 1124            */
 1125           public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
 1126               new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A",
 1127                                                                             "ArabicPresentationForms-A"});
 1128   
 1129           /**
 1130            * Constant for the "Combining Half Marks" Unicode character block.
 1131            * @since 1.2
 1132            */
 1133           public static final UnicodeBlock COMBINING_HALF_MARKS =
 1134               new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks",
 1135                                                                      "CombiningHalfMarks"});
 1136   
 1137           /**
 1138            * Constant for the "CJK Compatibility Forms" Unicode character block.
 1139            * @since 1.2
 1140            */
 1141           public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
 1142               new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms",
 1143                                                                         "CJKCompatibilityForms"});
 1144   
 1145           /**
 1146            * Constant for the "Small Form Variants" Unicode character block.
 1147            * @since 1.2
 1148            */
 1149           public static final UnicodeBlock SMALL_FORM_VARIANTS =
 1150               new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants",
 1151                                                                     "SmallFormVariants"});
 1152   
 1153           /**
 1154            * Constant for the "Arabic Presentation Forms-B" Unicode character block.
 1155            * @since 1.2
 1156            */
 1157           public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
 1158               new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B",
 1159                                                                             "ArabicPresentationForms-B"});
 1160   
 1161           /**
 1162            * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
 1163            * @since 1.2
 1164            */
 1165           public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
 1166               new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
 1167                                new String[] {"Halfwidth and Fullwidth Forms",
 1168                                              "HalfwidthandFullwidthForms"});
 1169   
 1170           /**
 1171            * Constant for the "Specials" Unicode character block.
 1172            * @since 1.2
 1173            */
 1174           public static final UnicodeBlock SPECIALS =
 1175               new UnicodeBlock("SPECIALS");
 1176   
 1177           /**
 1178            * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
 1179            *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
 1180            *             {@link #LOW_SURROGATES}. These new constants match
 1181            *             the block definitions of the Unicode Standard.
 1182            *             The {@link #of(char)} and {@link #of(int)} methods
 1183            *             return the new constants, not SURROGATES_AREA.
 1184            */
 1185           @Deprecated
 1186           public static final UnicodeBlock SURROGATES_AREA =
 1187               new UnicodeBlock("SURROGATES_AREA");
 1188   
 1189           /**
 1190            * Constant for the "Syriac" Unicode character block.
 1191            * @since 1.4
 1192            */
 1193           public static final UnicodeBlock SYRIAC =
 1194               new UnicodeBlock("SYRIAC");
 1195   
 1196           /**
 1197            * Constant for the "Thaana" Unicode character block.
 1198            * @since 1.4
 1199            */
 1200           public static final UnicodeBlock THAANA =
 1201               new UnicodeBlock("THAANA");
 1202   
 1203           /**
 1204            * Constant for the "Sinhala" Unicode character block.
 1205            * @since 1.4
 1206            */
 1207           public static final UnicodeBlock SINHALA =
 1208               new UnicodeBlock("SINHALA");
 1209   
 1210           /**
 1211            * Constant for the "Myanmar" Unicode character block.
 1212            * @since 1.4
 1213            */
 1214           public static final UnicodeBlock MYANMAR =
 1215               new UnicodeBlock("MYANMAR");
 1216   
 1217           /**
 1218            * Constant for the "Ethiopic" Unicode character block.
 1219            * @since 1.4
 1220            */
 1221           public static final UnicodeBlock ETHIOPIC =
 1222               new UnicodeBlock("ETHIOPIC");
 1223   
 1224           /**
 1225            * Constant for the "Cherokee" Unicode character block.
 1226            * @since 1.4
 1227            */
 1228           public static final UnicodeBlock CHEROKEE =
 1229               new UnicodeBlock("CHEROKEE");
 1230   
 1231           /**
 1232            * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
 1233            * @since 1.4
 1234            */
 1235           public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
 1236               new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
 1237                                new String[] {"Unified Canadian Aboriginal Syllabics",
 1238                                              "UnifiedCanadianAboriginalSyllabics"});
 1239   
 1240           /**
 1241            * Constant for the "Ogham" Unicode character block.
 1242            * @since 1.4
 1243            */
 1244           public static final UnicodeBlock OGHAM =
 1245                                new UnicodeBlock("OGHAM");
 1246   
 1247           /**
 1248            * Constant for the "Runic" Unicode character block.
 1249            * @since 1.4
 1250            */
 1251           public static final UnicodeBlock RUNIC =
 1252                                new UnicodeBlock("RUNIC");
 1253   
 1254           /**
 1255            * Constant for the "Khmer" Unicode character block.
 1256            * @since 1.4
 1257            */
 1258           public static final UnicodeBlock KHMER =
 1259                                new UnicodeBlock("KHMER");
 1260   
 1261           /**
 1262            * Constant for the "Mongolian" Unicode character block.
 1263            * @since 1.4
 1264            */
 1265           public static final UnicodeBlock MONGOLIAN =
 1266                                new UnicodeBlock("MONGOLIAN");
 1267   
 1268           /**
 1269            * Constant for the "Braille Patterns" Unicode character block.
 1270            * @since 1.4
 1271            */
 1272           public static final UnicodeBlock BRAILLE_PATTERNS =
 1273               new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns",
 1274                                                                  "BraillePatterns"});
 1275   
 1276           /**
 1277            * Constant for the "CJK Radicals Supplement" Unicode character block.
 1278            * @since 1.4
 1279            */
 1280           public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
 1281                new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement",
 1282                                                                          "CJKRadicalsSupplement"});
 1283   
 1284           /**
 1285            * Constant for the "Kangxi Radicals" Unicode character block.
 1286            * @since 1.4
 1287            */
 1288           public static final UnicodeBlock KANGXI_RADICALS =
 1289               new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"});
 1290   
 1291           /**
 1292            * Constant for the "Ideographic Description Characters" Unicode character block.
 1293            * @since 1.4
 1294            */
 1295           public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
 1296               new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters",
 1297                                                                                    "IdeographicDescriptionCharacters"});
 1298   
 1299           /**
 1300            * Constant for the "Bopomofo Extended" Unicode character block.
 1301            * @since 1.4
 1302            */
 1303           public static final UnicodeBlock BOPOMOFO_EXTENDED =
 1304               new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended",
 1305                                                                   "BopomofoExtended"});
 1306   
 1307           /**
 1308            * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
 1309            * @since 1.4
 1310            */
 1311           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
 1312               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A",
 1313                                                                                    "CJKUnifiedIdeographsExtensionA"});
 1314   
 1315           /**
 1316            * Constant for the "Yi Syllables" Unicode character block.
 1317            * @since 1.4
 1318            */
 1319           public static final UnicodeBlock YI_SYLLABLES =
 1320               new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"});
 1321   
 1322           /**
 1323            * Constant for the "Yi Radicals" Unicode character block.
 1324            * @since 1.4
 1325            */
 1326           public static final UnicodeBlock YI_RADICALS =
 1327               new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"});
 1328   
 1329   
 1330           /**
 1331            * Constant for the "Cyrillic Supplementary" Unicode character block.
 1332            * @since 1.5
 1333            */
 1334           public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
 1335               new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", new String[] {"Cyrillic Supplementary",
 1336                                                                        "CyrillicSupplementary"});
 1337   
 1338           /**
 1339            * Constant for the "Tagalog" Unicode character block.
 1340            * @since 1.5
 1341            */
 1342           public static final UnicodeBlock TAGALOG =
 1343               new UnicodeBlock("TAGALOG");
 1344   
 1345           /**
 1346            * Constant for the "Hanunoo" Unicode character block.
 1347            * @since 1.5
 1348            */
 1349           public static final UnicodeBlock HANUNOO =
 1350               new UnicodeBlock("HANUNOO");
 1351   
 1352           /**
 1353            * Constant for the "Buhid" Unicode character block.
 1354            * @since 1.5
 1355            */
 1356           public static final UnicodeBlock BUHID =
 1357               new UnicodeBlock("BUHID");
 1358   
 1359           /**
 1360            * Constant for the "Tagbanwa" Unicode character block.
 1361            * @since 1.5
 1362            */
 1363           public static final UnicodeBlock TAGBANWA =
 1364               new UnicodeBlock("TAGBANWA");
 1365   
 1366           /**
 1367            * Constant for the "Limbu" Unicode character block.
 1368            * @since 1.5
 1369            */
 1370           public static final UnicodeBlock LIMBU =
 1371               new UnicodeBlock("LIMBU");
 1372   
 1373           /**
 1374            * Constant for the "Tai Le" Unicode character block.
 1375            * @since 1.5
 1376            */
 1377           public static final UnicodeBlock TAI_LE =
 1378               new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"});
 1379   
 1380           /**
 1381            * Constant for the "Khmer Symbols" Unicode character block.
 1382            * @since 1.5
 1383            */
 1384           public static final UnicodeBlock KHMER_SYMBOLS =
 1385               new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"});
 1386   
 1387           /**
 1388            * Constant for the "Phonetic Extensions" Unicode character block.
 1389            * @since 1.5
 1390            */
 1391           public static final UnicodeBlock PHONETIC_EXTENSIONS =
 1392               new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"});
 1393   
 1394           /**
 1395            * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
 1396            * @since 1.5
 1397            */
 1398           public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
 1399               new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
 1400                                new String[]{"Miscellaneous Mathematical Symbols-A",
 1401                                             "MiscellaneousMathematicalSymbols-A"});
 1402   
 1403           /**
 1404            * Constant for the "Supplemental Arrows-A" Unicode character block.
 1405            * @since 1.5
 1406            */
 1407           public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
 1408               new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A",
 1409                                                                       "SupplementalArrows-A"});
 1410   
 1411           /**
 1412            * Constant for the "Supplemental Arrows-B" Unicode character block.
 1413            * @since 1.5
 1414            */
 1415           public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
 1416               new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B",
 1417                                                                       "SupplementalArrows-B"});
 1418   
 1419           /**
 1420            * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
 1421            * @since 1.5
 1422            */
 1423           public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 1424                   = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
 1425                                      new String[] {"Miscellaneous Mathematical Symbols-B",
 1426                                                    "MiscellaneousMathematicalSymbols-B"});
 1427   
 1428           /**
 1429            * Constant for the "Supplemental Mathematical Operators" Unicode character block.
 1430            * @since 1.5
 1431            */
 1432           public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
 1433               new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
 1434                                new String[]{"Supplemental Mathematical Operators",
 1435                                             "SupplementalMathematicalOperators"} );
 1436   
 1437           /**
 1438            * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
 1439            * @since 1.5
 1440            */
 1441           public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
 1442               new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows",
 1443                                                                                  "MiscellaneousSymbolsandArrows"});
 1444   
 1445           /**
 1446            * Constant for the "Katakana Phonetic Extensions" Unicode character block.
 1447            * @since 1.5
 1448            */
 1449           public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
 1450               new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions",
 1451                                                                              "KatakanaPhoneticExtensions"});
 1452   
 1453           /**
 1454            * Constant for the "Yijing Hexagram Symbols" Unicode character block.
 1455            * @since 1.5
 1456            */
 1457           public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
 1458               new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols",
 1459                                                                         "YijingHexagramSymbols"});
 1460   
 1461           /**
 1462            * Constant for the "Variation Selectors" Unicode character block.
 1463            * @since 1.5
 1464            */
 1465           public static final UnicodeBlock VARIATION_SELECTORS =
 1466               new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"});
 1467   
 1468           /**
 1469            * Constant for the "Linear B Syllabary" Unicode character block.
 1470            * @since 1.5
 1471            */
 1472           public static final UnicodeBlock LINEAR_B_SYLLABARY =
 1473               new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"});
 1474   
 1475           /**
 1476            * Constant for the "Linear B Ideograms" Unicode character block.
 1477            * @since 1.5
 1478            */
 1479           public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
 1480               new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"});
 1481   
 1482           /**
 1483            * Constant for the "Aegean Numbers" Unicode character block.
 1484            * @since 1.5
 1485            */
 1486           public static final UnicodeBlock AEGEAN_NUMBERS =
 1487               new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"});
 1488   
 1489           /**
 1490            * Constant for the "Old Italic" Unicode character block.
 1491            * @since 1.5
 1492            */
 1493           public static final UnicodeBlock OLD_ITALIC =
 1494               new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"});
 1495   
 1496           /**
 1497            * Constant for the "Gothic" Unicode character block.
 1498            * @since 1.5
 1499            */
 1500           public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
 1501   
 1502           /**
 1503            * Constant for the "Ugaritic" Unicode character block.
 1504            * @since 1.5
 1505            */
 1506           public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
 1507   
 1508           /**
 1509            * Constant for the "Deseret" Unicode character block.
 1510            * @since 1.5
 1511            */
 1512           public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
 1513   
 1514           /**
 1515            * Constant for the "Shavian" Unicode character block.
 1516            * @since 1.5
 1517            */
 1518           public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
 1519   
 1520           /**
 1521            * Constant for the "Osmanya" Unicode character block.
 1522            * @since 1.5
 1523            */
 1524           public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
 1525   
 1526           /**
 1527            * Constant for the "Cypriot Syllabary" Unicode character block.
 1528            * @since 1.5
 1529            */
 1530           public static final UnicodeBlock CYPRIOT_SYLLABARY =
 1531               new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"});
 1532   
 1533           /**
 1534            * Constant for the "Byzantine Musical Symbols" Unicode character block.
 1535            * @since 1.5
 1536            */
 1537           public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
 1538               new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols",
 1539                                                                           "ByzantineMusicalSymbols"});
 1540   
 1541           /**
 1542            * Constant for the "Musical Symbols" Unicode character block.
 1543            * @since 1.5
 1544            */
 1545           public static final UnicodeBlock MUSICAL_SYMBOLS =
 1546               new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"});
 1547   
 1548           /**
 1549            * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
 1550            * @since 1.5
 1551            */
 1552           public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
 1553               new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols",
 1554                                                                        "TaiXuanJingSymbols"});
 1555   
 1556           /**
 1557            * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
 1558            * @since 1.5
 1559            */
 1560           public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
 1561               new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
 1562                                new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
 1563   
 1564           /**
 1565            * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
 1566            * @since 1.5
 1567            */
 1568           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
 1569               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
 1570                                new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
 1571   
 1572           /**
 1573            * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
 1574            * @since 1.5
 1575            */
 1576           public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
 1577               new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
 1578                                new String[]{"CJK Compatibility Ideographs Supplement",
 1579                                             "CJKCompatibilityIdeographsSupplement"});
 1580   
 1581           /**
 1582            * Constant for the "Tags" Unicode character block.
 1583            * @since 1.5
 1584            */
 1585           public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
 1586   
 1587           /**
 1588            * Constant for the "Variation Selectors Supplement" Unicode character block.
 1589            * @since 1.5
 1590            */
 1591           public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
 1592               new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement",
 1593                                                                                "VariationSelectorsSupplement"});
 1594   
 1595           /**
 1596            * Constant for the "Supplementary Private Use Area-A" Unicode character block.
 1597            * @since 1.5
 1598            */
 1599           public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
 1600               new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
 1601                                new String[] {"Supplementary Private Use Area-A",
 1602                                              "SupplementaryPrivateUseArea-A"});
 1603   
 1604           /**
 1605            * Constant for the "Supplementary Private Use Area-B" Unicode character block.
 1606            * @since 1.5
 1607            */
 1608           public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
 1609               new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
 1610                                new String[] {"Supplementary Private Use Area-B",
 1611                                              "SupplementaryPrivateUseArea-B"});
 1612   
 1613           /**
 1614            * Constant for the "High Surrogates" Unicode character block.
 1615            * This block represents codepoint values in the high surrogate
 1616            * range: 0xD800 through 0xDB7F
 1617            *
 1618            * @since 1.5
 1619            */
 1620           public static final UnicodeBlock HIGH_SURROGATES =
 1621               new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"});
 1622   
 1623           /**
 1624            * Constant for the "High Private Use Surrogates" Unicode character block.
 1625            * This block represents codepoint values in the high surrogate
 1626            * range: 0xDB80 through 0xDBFF
 1627            *
 1628            * @since 1.5
 1629            */
 1630           public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
 1631               new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates",
 1632                                                                              "HighPrivateUseSurrogates"});
 1633   
 1634           /**
 1635            * Constant for the "Low Surrogates" Unicode character block.
 1636            * This block represents codepoint values in the high surrogate
 1637            * range: 0xDC00 through 0xDFFF
 1638            *
 1639            * @since 1.5
 1640            */
 1641           public static final UnicodeBlock LOW_SURROGATES =
 1642               new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"});
 1643   
 1644           private static final int blockStarts[] = {
 1645               0x0000, // Basic Latin
 1646               0x0080, // Latin-1 Supplement
 1647               0x0100, // Latin Extended-A
 1648               0x0180, // Latin Extended-B
 1649               0x0250, // IPA Extensions
 1650               0x02B0, // Spacing Modifier Letters
 1651               0x0300, // Combining Diacritical Marks
 1652               0x0370, // Greek and Coptic
 1653               0x0400, // Cyrillic
 1654               0x0500, // Cyrillic Supplementary
 1655               0x0530, // Armenian
 1656               0x0590, // Hebrew
 1657               0x0600, // Arabic
 1658               0x0700, // Syriac
 1659               0x0750, // unassigned
 1660               0x0780, // Thaana
 1661               0x07C0, // unassigned
 1662               0x0900, // Devanagari
 1663               0x0980, // Bengali
 1664               0x0A00, // Gurmukhi
 1665               0x0A80, // Gujarati
 1666               0x0B00, // Oriya
 1667               0x0B80, // Tamil
 1668               0x0C00, // Telugu
 1669               0x0C80, // Kannada
 1670               0x0D00, // Malayalam
 1671               0x0D80, // Sinhala
 1672               0x0E00, // Thai
 1673               0x0E80, // Lao
 1674               0x0F00, // Tibetan
 1675               0x1000, // Myanmar
 1676               0x10A0, // Georgian
 1677               0x1100, // Hangul Jamo
 1678               0x1200, // Ethiopic
 1679               0x1380, // unassigned
 1680               0x13A0, // Cherokee
 1681               0x1400, // Unified Canadian Aboriginal Syllabics
 1682               0x1680, // Ogham
 1683               0x16A0, // Runic
 1684               0x1700, // Tagalog
 1685               0x1720, // Hanunoo
 1686               0x1740, // Buhid
 1687               0x1760, // Tagbanwa
 1688               0x1780, // Khmer
 1689               0x1800, // Mongolian
 1690               0x18B0, // unassigned
 1691               0x1900, // Limbu
 1692               0x1950, // Tai Le
 1693               0x1980, // unassigned
 1694               0x19E0, // Khmer Symbols
 1695               0x1A00, // unassigned
 1696               0x1D00, // Phonetic Extensions
 1697               0x1D80, // unassigned
 1698               0x1E00, // Latin Extended Additional
 1699               0x1F00, // Greek Extended
 1700               0x2000, // General Punctuation
 1701               0x2070, // Superscripts and Subscripts
 1702               0x20A0, // Currency Symbols
 1703               0x20D0, // Combining Diacritical Marks for Symbols
 1704               0x2100, // Letterlike Symbols
 1705               0x2150, // Number Forms
 1706               0x2190, // Arrows
 1707               0x2200, // Mathematical Operators
 1708               0x2300, // Miscellaneous Technical
 1709               0x2400, // Control Pictures
 1710               0x2440, // Optical Character Recognition
 1711               0x2460, // Enclosed Alphanumerics
 1712               0x2500, // Box Drawing
 1713               0x2580, // Block Elements
 1714               0x25A0, // Geometric Shapes
 1715               0x2600, // Miscellaneous Symbols
 1716               0x2700, // Dingbats
 1717               0x27C0, // Miscellaneous Mathematical Symbols-A
 1718               0x27F0, // Supplemental Arrows-A
 1719               0x2800, // Braille Patterns
 1720               0x2900, // Supplemental Arrows-B
 1721               0x2980, // Miscellaneous Mathematical Symbols-B
 1722               0x2A00, // Supplemental Mathematical Operators
 1723               0x2B00, // Miscellaneous Symbols and Arrows
 1724               0x2C00, // unassigned
 1725               0x2E80, // CJK Radicals Supplement
 1726               0x2F00, // Kangxi Radicals
 1727               0x2FE0, // unassigned
 1728               0x2FF0, // Ideographic Description Characters
 1729               0x3000, // CJK Symbols and Punctuation
 1730               0x3040, // Hiragana
 1731               0x30A0, // Katakana
 1732               0x3100, // Bopomofo
 1733               0x3130, // Hangul Compatibility Jamo
 1734               0x3190, // Kanbun
 1735               0x31A0, // Bopomofo Extended
 1736               0x31C0, // unassigned
 1737               0x31F0, // Katakana Phonetic Extensions
 1738               0x3200, // Enclosed CJK Letters and Months
 1739               0x3300, // CJK Compatibility
 1740               0x3400, // CJK Unified Ideographs Extension A
 1741               0x4DC0, // Yijing Hexagram Symbols
 1742               0x4E00, // CJK Unified Ideographs
 1743               0xA000, // Yi Syllables
 1744               0xA490, // Yi Radicals
 1745               0xA4D0, // unassigned
 1746               0xAC00, // Hangul Syllables
 1747               0xD7B0, // unassigned
 1748               0xD800, // High Surrogates
 1749               0xDB80, // High Private Use Surrogates
 1750               0xDC00, // Low Surrogates
 1751               0xE000, // Private Use
 1752               0xF900, // CJK Compatibility Ideographs
 1753               0xFB00, // Alphabetic Presentation Forms
 1754               0xFB50, // Arabic Presentation Forms-A
 1755               0xFE00, // Variation Selectors
 1756               0xFE10, // unassigned
 1757               0xFE20, // Combining Half Marks
 1758               0xFE30, // CJK Compatibility Forms
 1759               0xFE50, // Small Form Variants
 1760               0xFE70, // Arabic Presentation Forms-B
 1761               0xFF00, // Halfwidth and Fullwidth Forms
 1762               0xFFF0, // Specials
 1763               0x10000, // Linear B Syllabary
 1764               0x10080, // Linear B Ideograms
 1765               0x10100, // Aegean Numbers
 1766               0x10140, // unassigned
 1767               0x10300, // Old Italic
 1768               0x10330, // Gothic
 1769               0x10350, // unassigned
 1770               0x10380, // Ugaritic
 1771               0x103A0, // unassigned
 1772               0x10400, // Deseret
 1773               0x10450, // Shavian
 1774               0x10480, // Osmanya
 1775               0x104B0, // unassigned
 1776               0x10800, // Cypriot Syllabary
 1777               0x10840, // unassigned
 1778               0x1D000, // Byzantine Musical Symbols
 1779               0x1D100, // Musical Symbols
 1780               0x1D200, // unassigned
 1781               0x1D300, // Tai Xuan Jing Symbols
 1782               0x1D360, // unassigned
 1783               0x1D400, // Mathematical Alphanumeric Symbols
 1784               0x1D800, // unassigned
 1785               0x20000, // CJK Unified Ideographs Extension B
 1786               0x2A6E0, // unassigned
 1787               0x2F800, // CJK Compatibility Ideographs Supplement
 1788               0x2FA20, // unassigned
 1789               0xE0000, // Tags
 1790               0xE0080, // unassigned
 1791               0xE0100, // Variation Selectors Supplement
 1792               0xE01F0, // unassigned
 1793               0xF0000, // Supplementary Private Use Area-A
 1794               0x100000, // Supplementary Private Use Area-B
 1795           };
 1796   
 1797           private static final UnicodeBlock[] blocks = {
 1798               BASIC_LATIN,
 1799               LATIN_1_SUPPLEMENT,
 1800               LATIN_EXTENDED_A,
 1801               LATIN_EXTENDED_B,
 1802               IPA_EXTENSIONS,
 1803               SPACING_MODIFIER_LETTERS,
 1804               COMBINING_DIACRITICAL_MARKS,
 1805               GREEK,
 1806               CYRILLIC,
 1807               CYRILLIC_SUPPLEMENTARY,
 1808               ARMENIAN,
 1809               HEBREW,
 1810               ARABIC,
 1811               SYRIAC,
 1812               null,
 1813               THAANA,
 1814               null,
 1815               DEVANAGARI,
 1816               BENGALI,
 1817               GURMUKHI,
 1818               GUJARATI,
 1819               ORIYA,
 1820               TAMIL,
 1821               TELUGU,
 1822               KANNADA,
 1823               MALAYALAM,
 1824               SINHALA,
 1825               THAI,
 1826               LAO,
 1827               TIBETAN,
 1828               MYANMAR,
 1829               GEORGIAN,
 1830               HANGUL_JAMO,
 1831               ETHIOPIC,
 1832               null,
 1833               CHEROKEE,
 1834               UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
 1835               OGHAM,
 1836               RUNIC,
 1837               TAGALOG,
 1838               HANUNOO,
 1839               BUHID,
 1840               TAGBANWA,
 1841               KHMER,
 1842               MONGOLIAN,
 1843               null,
 1844               LIMBU,
 1845               TAI_LE,
 1846               null,
 1847               KHMER_SYMBOLS,
 1848               null,
 1849               PHONETIC_EXTENSIONS,
 1850               null,
 1851               LATIN_EXTENDED_ADDITIONAL,
 1852               GREEK_EXTENDED,
 1853               GENERAL_PUNCTUATION,
 1854               SUPERSCRIPTS_AND_SUBSCRIPTS,
 1855               CURRENCY_SYMBOLS,
 1856               COMBINING_MARKS_FOR_SYMBOLS,
 1857               LETTERLIKE_SYMBOLS,
 1858               NUMBER_FORMS,
 1859               ARROWS,
 1860               MATHEMATICAL_OPERATORS,
 1861               MISCELLANEOUS_TECHNICAL,
 1862               CONTROL_PICTURES,
 1863               OPTICAL_CHARACTER_RECOGNITION,
 1864               ENCLOSED_ALPHANUMERICS,
 1865               BOX_DRAWING,
 1866               BLOCK_ELEMENTS,
 1867               GEOMETRIC_SHAPES,
 1868               MISCELLANEOUS_SYMBOLS,
 1869               DINGBATS,
 1870               MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
 1871               SUPPLEMENTAL_ARROWS_A,
 1872               BRAILLE_PATTERNS,
 1873               SUPPLEMENTAL_ARROWS_B,
 1874               MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
 1875               SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
 1876               MISCELLANEOUS_SYMBOLS_AND_ARROWS,
 1877               null,
 1878               CJK_RADICALS_SUPPLEMENT,
 1879               KANGXI_RADICALS,
 1880               null,
 1881               IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
 1882               CJK_SYMBOLS_AND_PUNCTUATION,
 1883               HIRAGANA,
 1884               KATAKANA,
 1885               BOPOMOFO,
 1886               HANGUL_COMPATIBILITY_JAMO,
 1887               KANBUN,
 1888               BOPOMOFO_EXTENDED,
 1889               null,
 1890               KATAKANA_PHONETIC_EXTENSIONS,
 1891               ENCLOSED_CJK_LETTERS_AND_MONTHS,
 1892               CJK_COMPATIBILITY,
 1893               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
 1894               YIJING_HEXAGRAM_SYMBOLS,
 1895               CJK_UNIFIED_IDEOGRAPHS,
 1896               YI_SYLLABLES,
 1897               YI_RADICALS,
 1898               null,
 1899               HANGUL_SYLLABLES,
 1900               null,
 1901               HIGH_SURROGATES,
 1902               HIGH_PRIVATE_USE_SURROGATES,
 1903               LOW_SURROGATES,
 1904               PRIVATE_USE_AREA,
 1905               CJK_COMPATIBILITY_IDEOGRAPHS,
 1906               ALPHABETIC_PRESENTATION_FORMS,
 1907               ARABIC_PRESENTATION_FORMS_A,
 1908               VARIATION_SELECTORS,
 1909               null,
 1910               COMBINING_HALF_MARKS,
 1911               CJK_COMPATIBILITY_FORMS,
 1912               SMALL_FORM_VARIANTS,
 1913               ARABIC_PRESENTATION_FORMS_B,
 1914               HALFWIDTH_AND_FULLWIDTH_FORMS,
 1915               SPECIALS,
 1916               LINEAR_B_SYLLABARY,
 1917               LINEAR_B_IDEOGRAMS,
 1918               AEGEAN_NUMBERS,
 1919               null,
 1920               OLD_ITALIC,
 1921               GOTHIC,
 1922               null,
 1923               UGARITIC,
 1924               null,
 1925               DESERET,
 1926               SHAVIAN,
 1927               OSMANYA,
 1928               null,
 1929               CYPRIOT_SYLLABARY,
 1930               null,
 1931               BYZANTINE_MUSICAL_SYMBOLS,
 1932               MUSICAL_SYMBOLS,
 1933               null,
 1934               TAI_XUAN_JING_SYMBOLS,
 1935               null,
 1936               MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
 1937               null,
 1938               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
 1939               null,
 1940               CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
 1941               null,
 1942               TAGS,
 1943               null,
 1944               VARIATION_SELECTORS_SUPPLEMENT,
 1945               null,
 1946               SUPPLEMENTARY_PRIVATE_USE_AREA_A,
 1947               SUPPLEMENTARY_PRIVATE_USE_AREA_B
 1948           };
 1949   
 1950   
 1951           /**
 1952            * Returns the object representing the Unicode block containing the
 1953            * given character, or <code>null</code> if the character is not a
 1954            * member of a defined block.
 1955            *
 1956                    * <p><b>Note:</b> This method cannot handle <a
 1957                    * href="Character.html#supplementary"> supplementary
 1958                    * characters</a>. To support all Unicode characters,
 1959                    * including supplementary characters, use the {@link
 1960                    * #of(int)} method.
 1961            *
 1962            * @param   c  The character in question
 1963            * @return  The <code>UnicodeBlock</code> instance representing the
 1964            *          Unicode block of which this character is a member, or
 1965            *          <code>null</code> if the character is not a member of any
 1966            *          Unicode block
 1967            */
 1968           public static UnicodeBlock of(char c) {
 1969               return of((int)c);
 1970           }
 1971   
 1972   
 1973           /**
 1974            * Returns the object representing the Unicode block
 1975            * containing the given character (Unicode code point), or
 1976            * <code>null</code> if the character is not a member of a
 1977            * defined block.
 1978            *
 1979                    * @param   codePoint the character (Unicode code point) in question.
 1980            * @return  The <code>UnicodeBlock</code> instance representing the
 1981            *          Unicode block of which this character is a member, or
 1982            *          <code>null</code> if the character is not a member of any
 1983            *          Unicode block
 1984                    * @exception IllegalArgumentException if the specified
 1985                    * <code>codePoint</code> is an invalid Unicode code point.
 1986                    * @see Character#isValidCodePoint(int)
 1987                    * @since   1.5
 1988            */
 1989           public static UnicodeBlock of(int codePoint) {
 1990               if (!isValidCodePoint(codePoint)) {
 1991                   throw new IllegalArgumentException();
 1992               }
 1993   
 1994               int top, bottom, current;
 1995               bottom = 0;
 1996               top = blockStarts.length;
 1997               current = top/2;
 1998   
 1999               // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
 2000               while (top - bottom > 1) {
 2001                   if (codePoint >= blockStarts[current]) {
 2002                       bottom = current;
 2003                   } else {
 2004                       top = current;
 2005                   }
 2006                   current = (top + bottom) / 2;
 2007               }
 2008               return blocks[current];
 2009           }
 2010   
 2011           /**
 2012            * Returns the UnicodeBlock with the given name. Block
 2013            * names are determined by The Unicode Standard. The file
 2014            * Blocks-&lt;version&gt;.txt defines blocks for a particular
 2015            * version of the standard. The {@link Character} class specifies
 2016            * the version of the standard that it supports.
 2017            * <p>
 2018            * This method accepts block names in the following forms:
 2019            * <ol>
 2020            * <li> Canonical block names as defined by the Unicode Standard.
 2021            * For example, the standard defines a "Basic Latin" block. Therefore, this
 2022            * method accepts "Basic Latin" as a valid block name. The documentation of
 2023            * each UnicodeBlock provides the canonical name.
 2024            * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
 2025            * is a valid block name for the "Basic Latin" block.
 2026            * <li>The text representation of each constant UnicodeBlock identifier.
 2027            * For example, this method will return the {@link #BASIC_LATIN} block if
 2028            * provided with the "BASIC_LATIN" name. This form replaces all spaces and
 2029            *  hyphens in the canonical name with underscores.
 2030            * </ol>
 2031            * Finally, character case is ignored for all of the valid block name forms.
 2032            * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
 2033            * The en_US locale's case mapping rules are used to provide case-insensitive
 2034            * string comparisons for block name validation.
 2035            * <p>
 2036            * If the Unicode Standard changes block names, both the previous and
 2037            * current names will be accepted.
 2038            *
 2039            * @param blockName A <code>UnicodeBlock</code> name.
 2040            * @return The <code>UnicodeBlock</code> instance identified
 2041            *         by <code>blockName</code>
 2042            * @throws IllegalArgumentException if <code>blockName</code> is an
 2043            *         invalid name
 2044            * @throws NullPointerException if <code>blockName</code> is null
 2045            * @since 1.5
 2046            */
 2047           public static final UnicodeBlock forName(String blockName) {
 2048               UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
 2049               if (block == null) {
 2050                   throw new IllegalArgumentException();
 2051               }
 2052               return block;
 2053           }
 2054       }
 2055   
 2056   
 2057       /**
 2058        * The value of the <code>Character</code>.
 2059        *
 2060        * @serial
 2061        */
 2062       private final char value;
 2063   
 2064       /** use serialVersionUID from JDK 1.0.2 for interoperability */
 2065       private static final long serialVersionUID = 3786198910865385080L;
 2066   
 2067       /**
 2068        * Constructs a newly allocated <code>Character</code> object that
 2069        * represents the specified <code>char</code> value.
 2070        *
 2071        * @param  value   the value to be represented by the
 2072        *                  <code>Character</code> object.
 2073        */
 2074       public Character(char value) {
 2075           this.value = value;
 2076       }
 2077   
 2078       private static class CharacterCache {
 2079           private CharacterCache(){}
 2080   
 2081           static final Character cache[] = new Character[127 + 1];
 2082   
 2083           static {
 2084               for(int i = 0; i < cache.length; i++)
 2085                   cache[i] = new Character((char)i);
 2086           }
 2087       }
 2088   
 2089       /**
 2090        * Returns a <tt>Character</tt> instance representing the specified
 2091        * <tt>char</tt> value.
 2092        * If a new <tt>Character</tt> instance is not required, this method
 2093        * should generally be used in preference to the constructor
 2094        * {@link #Character(char)}, as this method is likely to yield
 2095        * significantly better space and time performance by caching
 2096        * frequently requested values.
 2097        *
 2098        * @param  c a char value.
 2099        * @return a <tt>Character</tt> instance representing <tt>c</tt>.
 2100        * @since  1.5
 2101        */
 2102       public static Character valueOf(char c) {
 2103           if(c <= 127) { // must cache
 2104               return CharacterCache.cache[(int)c];
 2105           }
 2106           return new Character(c);
 2107       }
 2108   
 2109       /**
 2110        * Returns the value of this <code>Character</code> object.
 2111        * @return  the primitive <code>char</code> value represented by
 2112        *          this object.
 2113        */
 2114       public char charValue() {
 2115           return value;
 2116       }
 2117   
 2118       /**
 2119        * Returns a hash code for this <code>Character</code>.
 2120        * @return  a hash code value for this object.
 2121        */
 2122       public int hashCode() {
 2123           return (int)value;
 2124       }
 2125   
 2126       /**
 2127        * Compares this object against the specified object.
 2128        * The result is <code>true</code> if and only if the argument is not
 2129        * <code>null</code> and is a <code>Character</code> object that
 2130        * represents the same <code>char</code> value as this object.
 2131        *
 2132        * @param   obj   the object to compare with.
 2133        * @return  <code>true</code> if the objects are the same;
 2134        *          <code>false</code> otherwise.
 2135        */
 2136       public boolean equals(Object obj) {
 2137           if (obj instanceof Character) {
 2138               return value == ((Character)obj).charValue();
 2139           }
 2140           return false;
 2141       }
 2142   
 2143       /**
 2144        * Returns a <code>String</code> object representing this
 2145        * <code>Character</code>'s value.  The result is a string of
 2146        * length 1 whose sole component is the primitive
 2147        * <code>char</code> value represented by this
 2148        * <code>Character</code> object.
 2149        *
 2150        * @return  a string representation of this object.
 2151        */
 2152       public String toString() {
 2153           char buf[] = {value};
 2154           return String.valueOf(buf);
 2155       }
 2156   
 2157       /**
 2158        * Returns a <code>String</code> object representing the
 2159        * specified <code>char</code>.  The result is a string of length
 2160        * 1 consisting solely of the specified <code>char</code>.
 2161        *
 2162        * @param c the <code>char</code> to be converted
 2163        * @return the string representation of the specified <code>char</code>
 2164        * @since 1.4
 2165        */
 2166       public static String toString(char c) {
 2167           return String.valueOf(c);
 2168       }
 2169   
 2170       /**
 2171        * Determines whether the specified code point is a valid Unicode
 2172        * code point value in the range of <code>0x0000</code> to
 2173        * <code>0x10FFFF</code> inclusive. This method is equivalent to
 2174        * the expression:
 2175        *
 2176        * <blockquote><pre>
 2177        * codePoint >= 0x0000 && codePoint <= 0x10FFFF
 2178        * </pre></blockquote>
 2179        *
 2180        * @param  codePoint the Unicode code point to be tested
 2181        * @return <code>true</code> if the specified code point value
 2182        * is a valid code point value;
 2183        * <code>false</code> otherwise.
 2184        * @since  1.5
 2185        */
 2186       public static boolean isValidCodePoint(int codePoint) {
 2187           return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
 2188       }
 2189   
 2190       /**
 2191        * Determines whether the specified character (Unicode code point)
 2192        * is in the supplementary character range. The method call is
 2193        * equivalent to the expression:
 2194        * <blockquote><pre>
 2195        * codePoint >= 0x10000 && codePoint <= 0x10FFFF
 2196        * </pre></blockquote>
 2197        *
 2198        * @param  codePoint the character (Unicode code point) to be tested
 2199        * @return <code>true</code> if the specified character is in the Unicode
 2200        *         supplementary character range; <code>false</code> otherwise.
 2201        * @since  1.5
 2202        */
 2203       public static boolean isSupplementaryCodePoint(int codePoint) {
 2204           return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
 2205               && codePoint <= MAX_CODE_POINT;
 2206       }
 2207   
 2208       /**
 2209        * Determines if the given <code>char</code> value is a
 2210        * high-surrogate code unit (also known as <i>leading-surrogate
 2211        * code unit</i>). Such values do not represent characters by
 2212        * themselves, but are used in the representation of <a
 2213        * href="#supplementary">supplementary characters</a> in the
 2214        * UTF-16 encoding.
 2215        *
 2216        * <p>This method returns <code>true</code> if and only if
 2217        * <blockquote><pre>ch >= '&#92;uD800' && ch <= '&#92;uDBFF'
 2218        * </pre></blockquote>
 2219        * is <code>true</code>.
 2220        *
 2221        * @param   ch   the <code>char</code> value to be tested.
 2222        * @return  <code>true</code> if the <code>char</code> value
 2223        *          is between '&#92;uD800' and '&#92;uDBFF' inclusive;
 2224        *          <code>false</code> otherwise.
 2225        * @see     java.lang.Character#isLowSurrogate(char)
 2226        * @see     Character.UnicodeBlock#of(int)
 2227        * @since   1.5
 2228        */
 2229       public static boolean isHighSurrogate(char ch) {
 2230           return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
 2231       }
 2232   
 2233       /**
 2234        * Determines if the given <code>char</code> value is a
 2235        * low-surrogate code unit (also known as <i>trailing-surrogate code
 2236        * unit</i>). Such values do not represent characters by themselves,
 2237        * but are used in the representation of <a
 2238        * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
 2239        *
 2240        * <p> This method returns <code>true</code> if and only if
 2241        * <blockquote><pre>ch >= '&#92;uDC00' && ch <= '&#92;uDFFF'
 2242        * </pre></blockquote> is <code>true</code>.
 2243        *
 2244        * @param   ch   the <code>char</code> value to be tested.
 2245        * @return  <code>true</code> if the <code>char</code> value
 2246        *          is between '&#92;uDC00' and '&#92;uDFFF' inclusive;
 2247        *          <code>false</code> otherwise.
 2248        * @see java.lang.Character#isHighSurrogate(char)
 2249        * @since   1.5
 2250        */
 2251       public static boolean isLowSurrogate(char ch) {
 2252           return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
 2253       }
 2254   
 2255       /**
 2256        * Determines whether the specified pair of <code>char</code>
 2257        * values is a valid surrogate pair. This method is equivalent to
 2258        * the expression:
 2259        * <blockquote><pre>
 2260        * isHighSurrogate(high) && isLowSurrogate(low)
 2261        * </pre></blockquote>
 2262        *
 2263        * @param  high the high-surrogate code value to be tested
 2264        * @param  low the low-surrogate code value to be tested
 2265        * @return <code>true</code> if the specified high and
 2266        * low-surrogate code values represent a valid surrogate pair;
 2267        * <code>false</code> otherwise.
 2268        * @since  1.5
 2269        */
 2270       public static boolean isSurrogatePair(char high, char low) {
 2271           return isHighSurrogate(high) && isLowSurrogate(low);
 2272       }
 2273   
 2274       /**
 2275        * Determines the number of <code>char</code> values needed to
 2276        * represent the specified character (Unicode code point). If the
 2277        * specified character is equal to or greater than 0x10000, then
 2278        * the method returns 2. Otherwise, the method returns 1.
 2279        *
 2280        * <p>This method doesn't validate the specified character to be a
 2281        * valid Unicode code point. The caller must validate the
 2282        * character value using {@link #isValidCodePoint(int) isValidCodePoint}
 2283        * if necessary.
 2284        *
 2285        * @param   codePoint the character (Unicode code point) to be tested.
 2286        * @return  2 if the character is a valid supplementary character; 1 otherwise.
 2287        * @see     #isSupplementaryCodePoint(int)
 2288        * @since   1.5
 2289        */
 2290       public static int charCount(int codePoint) {
 2291           return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
 2292       }
 2293   
 2294       /**
 2295        * Converts the specified surrogate pair to its supplementary code
 2296        * point value. This method does not validate the specified
 2297        * surrogate pair. The caller must validate it using {@link
 2298        * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
 2299        *
 2300        * @param  high the high-surrogate code unit
 2301        * @param  low the low-surrogate code unit
 2302        * @return the supplementary code point composed from the
 2303        *         specified surrogate pair.
 2304        * @since  1.5
 2305        */
 2306       public static int toCodePoint(char high, char low) {
 2307           return ((high - MIN_HIGH_SURROGATE) << 10)
 2308               + (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
 2309       }
 2310   
 2311       /**
 2312        * Returns the code point at the given index of the
 2313        * <code>CharSequence</code>. If the <code>char</code> value at
 2314        * the given index in the <code>CharSequence</code> is in the
 2315        * high-surrogate range, the following index is less than the
 2316        * length of the <code>CharSequence</code>, and the
 2317        * <code>char</code> value at the following index is in the
 2318        * low-surrogate range, then the supplementary code point
 2319        * corresponding to this surrogate pair is returned. Otherwise,
 2320        * the <code>char</code> value at the given index is returned.
 2321        *
 2322        * @param seq a sequence of <code>char</code> values (Unicode code
 2323        * units)
 2324        * @param index the index to the <code>char</code> values (Unicode
 2325        * code units) in <code>seq</code> to be converted
 2326        * @return the Unicode code point at the given index
 2327        * @exception NullPointerException if <code>seq</code> is null.
 2328        * @exception IndexOutOfBoundsException if the value
 2329        * <code>index</code> is negative or not less than
 2330        * {@link CharSequence#length() seq.length()}.
 2331        * @since  1.5
 2332        */
 2333       public static int codePointAt(CharSequence seq, int index) {
 2334           char c1 = seq.charAt(index++);
 2335           if (isHighSurrogate(c1)) {
 2336               if (index < seq.length()) {
 2337                   char c2 = seq.charAt(index);
 2338                   if (isLowSurrogate(c2)) {
 2339                       return toCodePoint(c1, c2);
 2340                   }
 2341               }
 2342           }
 2343           return c1;
 2344       }
 2345   
 2346       /**
 2347        * Returns the code point at the given index of the
 2348        * <code>char</code> array. If the <code>char</code> value at
 2349        * the given index in the <code>char</code> array is in the
 2350        * high-surrogate range, the following index is less than the
 2351        * length of the <code>char</code> array, and the
 2352        * <code>char</code> value at the following index is in the
 2353        * low-surrogate range, then the supplementary code point
 2354        * corresponding to this surrogate pair is returned. Otherwise,
 2355        * the <code>char</code> value at the given index is returned.
 2356        *
 2357        * @param a the <code>char</code> array
 2358        * @param index the index to the <code>char</code> values (Unicode
 2359        * code units) in the <code>char</code> array to be converted
 2360        * @return the Unicode code point at the given index
 2361        * @exception NullPointerException if <code>a</code> is null.
 2362        * @exception IndexOutOfBoundsException if the value
 2363        * <code>index</code> is negative or not less than
 2364        * the length of the <code>char</code> array.
 2365        * @since  1.5
 2366        */
 2367       public static int codePointAt(char[] a, int index) {
 2368           return codePointAtImpl(a, index, a.length);
 2369       }
 2370   
 2371       /**
 2372        * Returns the code point at the given index of the
 2373        * <code>char</code> array, where only array elements with
 2374        * <code>index</code> less than <code>limit</code> can be used. If
 2375        * the <code>char</code> value at the given index in the
 2376        * <code>char</code> array is in the high-surrogate range, the
 2377        * following index is less than the <code>limit</code>, and the
 2378        * <code>char</code> value at the following index is in the
 2379        * low-surrogate range, then the supplementary code point
 2380        * corresponding to this surrogate pair is returned. Otherwise,
 2381        * the <code>char</code> value at the given index is returned.
 2382        *
 2383        * @param a the <code>char</code> array
 2384        * @param index the index to the <code>char</code> values (Unicode
 2385        * code units) in the <code>char</code> array to be converted
 2386        * @param limit the index after the last array element that can be used in the
 2387        * <code>char</code> array
 2388        * @return the Unicode code point at the given index
 2389        * @exception NullPointerException if <code>a</code> is null.
 2390        * @exception IndexOutOfBoundsException if the <code>index</code>
 2391        * argument is negative or not less than the <code>limit</code>
 2392        * argument, or if the <code>limit</code> argument is negative or
 2393        * greater than the length of the <code>char</code> array.
 2394        * @since  1.5
 2395        */
 2396       public static int codePointAt(char[] a, int index, int limit) {
 2397           if (index >= limit || limit < 0 || limit > a.length) {
 2398               throw new IndexOutOfBoundsException();
 2399           }
 2400           return codePointAtImpl(a, index, limit);
 2401       }
 2402   
 2403       static int codePointAtImpl(char[] a, int index, int limit) {
 2404           char c1 = a[index++];
 2405           if (isHighSurrogate(c1)) {
 2406               if (index < limit) {
 2407                   char c2 = a[index];
 2408                   if (isLowSurrogate(c2)) {
 2409                       return toCodePoint(c1, c2);
 2410                   }
 2411               }
 2412           }
 2413           return c1;
 2414       }
 2415   
 2416       /**
 2417        * Returns the code point precedi