Save This Page
Home » Open-JDK-6.b17-src » java » lang » [javadoc | source]
    1   /*
    2    * Copyright 2002-2006 Sun Microsystems, Inc.  All Rights Reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Sun designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Sun in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   22    * CA 95054 USA or visit www.sun.com if you need additional information or
   23    * have any questions.
   24    */
   25   
   26   package java.lang;
   27   import java.util.Map;
   28   import java.util.HashMap;
   29   import java.util.Locale;
   30   
   31   /**
   32    * The <code>Character</code> class wraps a value of the primitive
   33    * type <code>char</code> in an object. An object of type
   34    * <code>Character</code> contains a single field whose type is
   35    * <code>char</code>.
   36    * <p>
   37    * In addition, this class provides several methods for determining
   38    * a character's category (lowercase letter, digit, etc.) and for converting
   39    * characters from uppercase to lowercase and vice versa.
   40    * <p>
   41    * Character information is based on the Unicode Standard, version 4.0.
   42    * <p>
   43    * The methods and data of class <code>Character</code> are defined by
   44    * the information in the <i>UnicodeData</i> file that is part of the
   45    * Unicode Character Database maintained by the Unicode
   46    * Consortium. This file specifies various properties including name
   47    * and general category for every defined Unicode code point or
   48    * character range.
   49    * <p>
   50    * The file and its description are available from the Unicode Consortium at:
   51    * <ul>
   52    * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
   53    * </ul>
   54    *
   55    * <h4><a name="unicode">Unicode Character Representations</a></h4>
   56    *
   57    * <p>The <code>char</code> data type (and therefore the value that a
   58    * <code>Character</code> object encapsulates) are based on the
   59    * original Unicode specification, which defined characters as
   60    * fixed-width 16-bit entities. The Unicode standard has since been
   61    * changed to allow for characters whose representation requires more
   62    * than 16 bits.  The range of legal <em>code point</em>s is now
   63    * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
   64    * (Refer to the <a
   65    * href="http://www.unicode.org/reports/tr27/#notation"><i>
   66    * definition</i></a> of the U+<i>n</i> notation in the Unicode
   67    * standard.)
   68    *
   69    * <p>The set of characters from U+0000 to U+FFFF is sometimes
   70    * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
   71    * name="supplementary">Characters</a> whose code points are greater
   72    * than U+FFFF are called <em>supplementary character</em>s.  The Java
   73    * 2 platform uses the UTF-16 representation in <code>char</code>
   74    * arrays and in the <code>String</code> and <code>StringBuffer</code>
   75    * classes. In this representation, supplementary characters are
   76    * represented as a pair of <code>char</code> values, the first from
   77    * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
   78    * second from the <em>low-surrogates</em> range
   79    * (&#92;uDC00-&#92;uDFFF).
   80    *
   81    * <p>A <code>char</code> value, therefore, represents Basic
   82    * Multilingual Plane (BMP) code points, including the surrogate
   83    * code points, or code units of the UTF-16 encoding. An
   84    * <code>int</code> value represents all Unicode code points,
   85    * including supplementary code points. The lower (least significant)
   86    * 21 bits of <code>int</code> are used to represent Unicode code
   87    * points and the upper (most significant) 11 bits must be zero.
   88    * Unless otherwise specified, the behavior with respect to
   89    * supplementary characters and surrogate <code>char</code> values is
   90    * as follows:
   91    *
   92    * <ul>
   93    * <li>The methods that only accept a <code>char</code> value cannot support
   94    * supplementary characters. They treat <code>char</code> values from the
   95    * surrogate ranges as undefined characters. For example,
   96    * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
   97    * this specific value if followed by any low-surrogate value in a string
   98    * would represent a letter.
   99    *
  100    * <li>The methods that accept an <code>int</code> value support all
  101    * Unicode characters, including supplementary characters. For
  102    * example, <code>Character.isLetter(0x2F81A)</code> returns
  103    * <code>true</code> because the code point value represents a letter
  104    * (a CJK ideograph).
  105    * </ul>
  106    *
  107    * <p>In the Java SE API documentation, <em>Unicode code point</em> is
  108    * used for character values in the range between U+0000 and U+10FFFF,
  109    * and <em>Unicode code unit</em> is used for 16-bit
  110    * <code>char</code> values that are code units of the <em>UTF-16</em>
  111    * encoding. For more information on Unicode terminology, refer to the
  112    * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
  113    *
  114    * @author  Lee Boynton
  115    * @author  Guy Steele
  116    * @author  Akira Tanaka
  117    * @since   1.0
  118    */
  119   public final
  120   class Character extends Object implements java.io.Serializable, Comparable<Character> {
  121       /**
  122        * The minimum radix available for conversion to and from strings.
  123        * The constant value of this field is the smallest value permitted
  124        * for the radix argument in radix-conversion methods such as the
  125        * <code>digit</code> method, the <code>forDigit</code>
  126        * method, and the <code>toString</code> method of class
  127        * <code>Integer</code>.
  128        *
  129        * @see     java.lang.Character#digit(char, int)
  130        * @see     java.lang.Character#forDigit(int, int)
  131        * @see     java.lang.Integer#toString(int, int)
  132        * @see     java.lang.Integer#valueOf(java.lang.String)
  133        */
  134       public static final int MIN_RADIX = 2;
  135   
  136       /**
  137        * The maximum radix available for conversion to and from strings.
  138        * The constant value of this field is the largest value permitted
  139        * for the radix argument in radix-conversion methods such as the
  140        * <code>digit</code> method, the <code>forDigit</code>
  141        * method, and the <code>toString</code> method of class
  142        * <code>Integer</code>.
  143        *
  144        * @see     java.lang.Character#digit(char, int)
  145        * @see     java.lang.Character#forDigit(int, int)
  146        * @see     java.lang.Integer#toString(int, int)
  147        * @see     java.lang.Integer#valueOf(java.lang.String)
  148        */
  149       public static final int MAX_RADIX = 36;
  150   
  151       /**
  152        * The constant value of this field is the smallest value of type
  153        * <code>char</code>, <code>'&#92;u0000'</code>.
  154        *
  155        * @since   1.0.2
  156        */
  157       public static final char   MIN_VALUE = '\u0000';
  158   
  159       /**
  160        * The constant value of this field is the largest value of type
  161        * <code>char</code>, <code>'&#92;uFFFF'</code>.
  162        *
  163        * @since   1.0.2
  164        */
  165       public static final char   MAX_VALUE = '\uffff';
  166   
  167       /**
  168        * The <code>Class</code> instance representing the primitive type
  169        * <code>char</code>.
  170        *
  171        * @since   1.1
  172        */
  173       public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
  174   
  175      /*
  176       * Normative general types
  177       */
  178   
  179      /*
  180       * General character types
  181       */
  182   
  183      /**
  184       * General category "Cn" in the Unicode specification.
  185       * @since   1.1
  186       */
  187       public static final byte
  188           UNASSIGNED                  = 0;
  189   
  190      /**
  191       * General category "Lu" in the Unicode specification.
  192       * @since   1.1
  193       */
  194       public static final byte
  195           UPPERCASE_LETTER            = 1;
  196   
  197      /**
  198       * General category "Ll" in the Unicode specification.
  199       * @since   1.1
  200       */
  201       public static final byte
  202           LOWERCASE_LETTER            = 2;
  203   
  204      /**
  205       * General category "Lt" in the Unicode specification.
  206       * @since   1.1
  207       */
  208       public static final byte
  209           TITLECASE_LETTER            = 3;
  210   
  211      /**
  212       * General category "Lm" in the Unicode specification.
  213       * @since   1.1
  214       */
  215       public static final byte
  216           MODIFIER_LETTER             = 4;
  217   
  218      /**
  219       * General category "Lo" in the Unicode specification.
  220       * @since   1.1
  221       */
  222       public static final byte
  223           OTHER_LETTER                = 5;
  224   
  225      /**
  226       * General category "Mn" in the Unicode specification.
  227       * @since   1.1
  228       */
  229       public static final byte
  230           NON_SPACING_MARK            = 6;
  231   
  232      /**
  233       * General category "Me" in the Unicode specification.
  234       * @since   1.1
  235       */
  236       public static final byte
  237           ENCLOSING_MARK              = 7;
  238   
  239      /**
  240       * General category "Mc" in the Unicode specification.
  241       * @since   1.1
  242       */
  243       public static final byte
  244           COMBINING_SPACING_MARK      = 8;
  245   
  246      /**
  247       * General category "Nd" in the Unicode specification.
  248       * @since   1.1
  249       */
  250       public static final byte
  251           DECIMAL_DIGIT_NUMBER        = 9;
  252   
  253      /**
  254       * General category "Nl" in the Unicode specification.
  255       * @since   1.1
  256       */
  257       public static final byte
  258           LETTER_NUMBER               = 10;
  259   
  260      /**
  261       * General category "No" in the Unicode specification.
  262       * @since   1.1
  263       */
  264       public static final byte
  265           OTHER_NUMBER                = 11;
  266   
  267      /**
  268       * General category "Zs" in the Unicode specification.
  269       * @since   1.1
  270       */
  271       public static final byte
  272           SPACE_SEPARATOR             = 12;
  273   
  274      /**
  275       * General category "Zl" in the Unicode specification.
  276       * @since   1.1
  277       */
  278       public static final byte
  279           LINE_SEPARATOR              = 13;
  280   
  281      /**
  282       * General category "Zp" in the Unicode specification.
  283       * @since   1.1
  284       */
  285       public static final byte
  286           PARAGRAPH_SEPARATOR         = 14;
  287   
  288      /**
  289       * General category "Cc" in the Unicode specification.
  290       * @since   1.1
  291       */
  292       public static final byte
  293           CONTROL                     = 15;
  294   
  295      /**
  296       * General category "Cf" in the Unicode specification.
  297       * @since   1.1
  298       */
  299       public static final byte
  300           FORMAT                      = 16;
  301   
  302      /**
  303       * General category "Co" in the Unicode specification.
  304       * @since   1.1
  305       */
  306       public static final byte
  307           PRIVATE_USE                 = 18;
  308   
  309      /**
  310       * General category "Cs" in the Unicode specification.
  311       * @since   1.1
  312       */
  313       public static final byte
  314           SURROGATE                   = 19;
  315   
  316      /**
  317       * General category "Pd" in the Unicode specification.
  318       * @since   1.1
  319       */
  320       public static final byte
  321           DASH_PUNCTUATION            = 20;
  322   
  323      /**
  324       * General category "Ps" in the Unicode specification.
  325       * @since   1.1
  326       */
  327       public static final byte
  328           START_PUNCTUATION           = 21;
  329   
  330      /**
  331       * General category "Pe" in the Unicode specification.
  332       * @since   1.1
  333       */
  334       public static final byte
  335           END_PUNCTUATION             = 22;
  336   
  337      /**
  338       * General category "Pc" in the Unicode specification.
  339       * @since   1.1
  340       */
  341       public static final byte
  342           CONNECTOR_PUNCTUATION       = 23;
  343   
  344      /**
  345       * General category "Po" in the Unicode specification.
  346       * @since   1.1
  347       */
  348       public static final byte
  349           OTHER_PUNCTUATION           = 24;
  350   
  351      /**
  352       * General category "Sm" in the Unicode specification.
  353       * @since   1.1
  354       */
  355       public static final byte
  356           MATH_SYMBOL                 = 25;
  357   
  358      /**
  359       * General category "Sc" in the Unicode specification.
  360       * @since   1.1
  361       */
  362       public static final byte
  363           CURRENCY_SYMBOL             = 26;
  364   
  365      /**
  366       * General category "Sk" in the Unicode specification.
  367       * @since   1.1
  368       */
  369       public static final byte
  370           MODIFIER_SYMBOL             = 27;
  371   
  372      /**
  373       * General category "So" in the Unicode specification.
  374       * @since   1.1
  375       */
  376       public static final byte
  377           OTHER_SYMBOL                = 28;
  378   
  379      /**
  380       * General category "Pi" in the Unicode specification.
  381       * @since   1.4
  382       */
  383       public static final byte
  384           INITIAL_QUOTE_PUNCTUATION   = 29;
  385   
  386      /**
  387       * General category "Pf" in the Unicode specification.
  388       * @since   1.4
  389       */
  390       public static final byte
  391           FINAL_QUOTE_PUNCTUATION     = 30;
  392   
  393       /**
  394        * Error flag. Use int (code point) to avoid confusion with U+FFFF.
  395        */
  396        static final int ERROR = 0xFFFFFFFF;
  397   
  398   
  399       /**
  400        * Undefined bidirectional character type. Undefined <code>char</code>
  401        * values have undefined directionality in the Unicode specification.
  402        * @since 1.4
  403        */
  404        public static final byte DIRECTIONALITY_UNDEFINED = -1;
  405   
  406       /**
  407        * Strong bidirectional character type "L" in the Unicode specification.
  408        * @since 1.4
  409        */
  410       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
  411   
  412       /**
  413        * Strong bidirectional character type "R" in the Unicode specification.
  414        * @since 1.4
  415        */
  416       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
  417   
  418       /**
  419       * Strong bidirectional character type "AL" in the Unicode specification.
  420        * @since 1.4
  421        */
  422       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
  423   
  424       /**
  425        * Weak bidirectional character type "EN" in the Unicode specification.
  426        * @since 1.4
  427        */
  428       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
  429   
  430       /**
  431        * Weak bidirectional character type "ES" in the Unicode specification.
  432        * @since 1.4
  433        */
  434       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
  435   
  436       /**
  437        * Weak bidirectional character type "ET" in the Unicode specification.
  438        * @since 1.4
  439        */
  440       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
  441   
  442       /**
  443        * Weak bidirectional character type "AN" in the Unicode specification.
  444        * @since 1.4
  445        */
  446       public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
  447   
  448       /**
  449        * Weak bidirectional character type "CS" in the Unicode specification.
  450        * @since 1.4
  451        */
  452       public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
  453   
  454       /**
  455        * Weak bidirectional character type "NSM" in the Unicode specification.
  456        * @since 1.4
  457        */
  458       public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
  459   
  460       /**
  461        * Weak bidirectional character type "BN" in the Unicode specification.
  462        * @since 1.4
  463        */
  464       public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
  465   
  466       /**
  467        * Neutral bidirectional character type "B" in the Unicode specification.
  468        * @since 1.4
  469        */
  470       public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
  471   
  472       /**
  473        * Neutral bidirectional character type "S" in the Unicode specification.
  474        * @since 1.4
  475        */
  476       public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
  477   
  478       /**
  479        * Neutral bidirectional character type "WS" in the Unicode specification.
  480        * @since 1.4
  481        */
  482       public static final byte DIRECTIONALITY_WHITESPACE = 12;
  483   
  484       /**
  485        * Neutral bidirectional character type "ON" in the Unicode specification.
  486        * @since 1.4
  487        */
  488       public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
  489   
  490       /**
  491        * Strong bidirectional character type "LRE" in the Unicode specification.
  492        * @since 1.4
  493        */
  494       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
  495   
  496       /**
  497        * Strong bidirectional character type "LRO" in the Unicode specification.
  498        * @since 1.4
  499        */
  500       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
  501   
  502       /**
  503        * Strong bidirectional character type "RLE" in the Unicode specification.
  504        * @since 1.4
  505        */
  506       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
  507   
  508       /**
  509        * Strong bidirectional character type "RLO" in the Unicode specification.
  510        * @since 1.4
  511        */
  512       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
  513   
  514       /**
  515        * Weak bidirectional character type "PDF" in the Unicode specification.
  516        * @since 1.4
  517        */
  518       public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
  519   
  520       /**
  521        * The minimum value of a Unicode high-surrogate code unit in the
  522        * UTF-16 encoding. A high-surrogate is also known as a
  523        * <i>leading-surrogate</i>.
  524        *
  525        * @since 1.5
  526        */
  527       public static final char MIN_HIGH_SURROGATE = '\uD800';
  528   
  529       /**
  530        * The maximum value of a Unicode high-surrogate code unit in the
  531        * UTF-16 encoding. A high-surrogate is also known as a
  532        * <i>leading-surrogate</i>.
  533        *
  534        * @since 1.5
  535        */
  536       public static final char MAX_HIGH_SURROGATE = '\uDBFF';
  537   
  538       /**
  539        * The minimum value of a Unicode low-surrogate code unit in the
  540        * UTF-16 encoding. A low-surrogate is also known as a
  541        * <i>trailing-surrogate</i>.
  542        *
  543        * @since 1.5
  544        */
  545       public static final char MIN_LOW_SURROGATE  = '\uDC00';
  546   
  547       /**
  548        * The maximum value of a Unicode low-surrogate code unit in the
  549        * UTF-16 encoding. A low-surrogate is also known as a
  550        * <i>trailing-surrogate</i>.
  551        *
  552        * @since 1.5
  553        */
  554       public static final char MAX_LOW_SURROGATE  = '\uDFFF';
  555   
  556       /**
  557        * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
  558        *
  559        * @since 1.5
  560        */
  561       public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
  562   
  563       /**
  564        * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
  565        *
  566        * @since 1.5
  567        */
  568       public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
  569   
  570       /**
  571        * The minimum value of a supplementary code point.
  572        *
  573        * @since 1.5
  574        */
  575       public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
  576   
  577       /**
  578        * The minimum value of a Unicode code point.
  579        *
  580        * @since 1.5
  581        */
  582       public static final int MIN_CODE_POINT = 0x000000;
  583   
  584       /**
  585        * The maximum value of a Unicode code point.
  586        *
  587        * @since 1.5
  588        */
  589       public static final int MAX_CODE_POINT = 0x10ffff;
  590   
  591   
  592       /**
  593        * Instances of this class represent particular subsets of the Unicode
  594        * character set.  The only family of subsets defined in the
  595        * <code>Character</code> class is <code>{@link Character.UnicodeBlock
  596        * UnicodeBlock}</code>.  Other portions of the Java API may define other
  597        * subsets for their own purposes.
  598        *
  599        * @since 1.2
  600        */
  601       public static class Subset  {
  602   
  603           private String name;
  604   
  605           /**
  606            * Constructs a new <code>Subset</code> instance.
  607            *
  608            * @exception NullPointerException if name is <code>null</code>
  609            * @param  name  The name of this subset
  610            */
  611           protected Subset(String name) {
  612               if (name == null) {
  613                   throw new NullPointerException("name");
  614               }
  615               this.name = name;
  616           }
  617   
  618           /**
  619            * Compares two <code>Subset</code> objects for equality.
  620            * This method returns <code>true</code> if and only if
  621            * <code>this</code> and the argument refer to the same
  622            * object; since this method is <code>final</code>, this
  623            * guarantee holds for all subclasses.
  624            */
  625           public final boolean equals(Object obj) {
  626               return (this == obj);
  627           }
  628   
  629           /**
  630            * Returns the standard hash code as defined by the
  631            * <code>{@link Object#hashCode}</code> method.  This method
  632            * is <code>final</code> in order to ensure that the
  633            * <code>equals</code> and <code>hashCode</code> methods will
  634            * be consistent in all subclasses.
  635            */
  636           public final int hashCode() {
  637               return super.hashCode();
  638           }
  639   
  640           /**
  641            * Returns the name of this subset.
  642            */
  643           public final String toString() {
  644               return name;
  645           }
  646       }
  647   
  648       /**
  649        * A family of character subsets representing the character blocks in the
  650        * Unicode specification. Character blocks generally define characters
  651        * used for a specific script or purpose. A character is contained by
  652        * at most one Unicode block.
  653        *
  654        * @since 1.2
  655        */
  656       public static final class UnicodeBlock extends Subset {
  657   
  658           private static Map map = new HashMap();
  659   
  660           /**
  661            * Create a UnicodeBlock with the given identifier name.
  662            * This name must be the same as the block identifier.
  663            */
  664           private UnicodeBlock(String idName) {
  665               super(idName);
  666               map.put(idName.toUpperCase(Locale.US), this);
  667           }
  668   
  669           /**
  670            * Create a UnicodeBlock with the given identifier name and
  671            * alias name.
  672            */
  673           private UnicodeBlock(String idName, String alias) {
  674               this(idName);
  675               map.put(alias.toUpperCase(Locale.US), this);
  676           }
  677   
  678           /**
  679            * Create a UnicodeBlock with the given identifier name and
  680            * alias names.
  681            */
  682           private UnicodeBlock(String idName, String[] aliasName) {
  683               this(idName);
  684               if (aliasName != null) {
  685                   for(int x=0; x<aliasName.length; ++x) {
  686                       map.put(aliasName[x].toUpperCase(Locale.US), this);
  687                   }
  688               }
  689           }
  690   
  691           /**
  692            * Constant for the "Basic Latin" Unicode character block.
  693            * @since 1.2
  694            */
  695           public static final UnicodeBlock  BASIC_LATIN =
  696               new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" });
  697   
  698           /**
  699            * Constant for the "Latin-1 Supplement" Unicode character block.
  700            * @since 1.2
  701            */
  702           public static final UnicodeBlock LATIN_1_SUPPLEMENT =
  703               new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"});
  704   
  705           /**
  706            * Constant for the "Latin Extended-A" Unicode character block.
  707            * @since 1.2
  708            */
  709           public static final UnicodeBlock LATIN_EXTENDED_A =
  710               new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"});
  711   
  712           /**
  713            * Constant for the "Latin Extended-B" Unicode character block.
  714            * @since 1.2
  715            */
  716           public static final UnicodeBlock LATIN_EXTENDED_B =
  717               new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"});
  718   
  719           /**
  720            * Constant for the "IPA Extensions" Unicode character block.
  721            * @since 1.2
  722            */
  723           public static final UnicodeBlock IPA_EXTENSIONS =
  724               new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"});
  725   
  726           /**
  727            * Constant for the "Spacing Modifier Letters" Unicode character block.
  728            * @since 1.2
  729            */
  730           public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
  731               new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters",
  732                                                                           "SpacingModifierLetters"});
  733   
  734           /**
  735            * Constant for the "Combining Diacritical Marks" Unicode character block.
  736            * @since 1.2
  737            */
  738           public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
  739               new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks",
  740                                                                             "CombiningDiacriticalMarks" });
  741   
  742           /**
  743            * Constant for the "Greek and Coptic" Unicode character block.
  744            * <p>
  745            * This block was previously known as the "Greek" block.
  746            *
  747            * @since 1.2
  748            */
  749           public static final UnicodeBlock GREEK
  750               = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"});
  751   
  752           /**
  753            * Constant for the "Cyrillic" Unicode character block.
  754            * @since 1.2
  755            */
  756           public static final UnicodeBlock CYRILLIC =
  757               new UnicodeBlock("CYRILLIC");
  758   
  759           /**
  760            * Constant for the "Armenian" Unicode character block.
  761            * @since 1.2
  762            */
  763           public static final UnicodeBlock ARMENIAN =
  764               new UnicodeBlock("ARMENIAN");
  765   
  766           /**
  767            * Constant for the "Hebrew" Unicode character block.
  768            * @since 1.2
  769            */
  770           public static final UnicodeBlock HEBREW =
  771               new UnicodeBlock("HEBREW");
  772   
  773           /**
  774            * Constant for the "Arabic" Unicode character block.
  775            * @since 1.2
  776            */
  777           public static final UnicodeBlock ARABIC =
  778               new UnicodeBlock("ARABIC");
  779   
  780           /**
  781            * Constant for the "Devanagari" Unicode character block.
  782            * @since 1.2
  783            */
  784           public static final UnicodeBlock DEVANAGARI =
  785               new UnicodeBlock("DEVANAGARI");
  786   
  787           /**
  788            * Constant for the "Bengali" Unicode character block.
  789            * @since 1.2
  790            */
  791           public static final UnicodeBlock BENGALI =
  792               new UnicodeBlock("BENGALI");
  793   
  794           /**
  795            * Constant for the "Gurmukhi" Unicode character block.
  796            * @since 1.2
  797            */
  798           public static final UnicodeBlock GURMUKHI =
  799               new UnicodeBlock("GURMUKHI");
  800   
  801           /**
  802            * Constant for the "Gujarati" Unicode character block.
  803            * @since 1.2
  804            */
  805           public static final UnicodeBlock GUJARATI =
  806               new UnicodeBlock("GUJARATI");
  807   
  808           /**
  809            * Constant for the "Oriya" Unicode character block.
  810            * @since 1.2
  811            */
  812           public static final UnicodeBlock ORIYA =
  813               new UnicodeBlock("ORIYA");
  814   
  815           /**
  816            * Constant for the "Tamil" Unicode character block.
  817            * @since 1.2
  818            */
  819           public static final UnicodeBlock TAMIL =
  820               new UnicodeBlock("TAMIL");
  821   
  822           /**
  823            * Constant for the "Telugu" Unicode character block.
  824            * @since 1.2
  825            */
  826           public static final UnicodeBlock TELUGU =
  827               new UnicodeBlock("TELUGU");
  828   
  829           /**
  830            * Constant for the "Kannada" Unicode character block.
  831            * @since 1.2
  832            */
  833           public static final UnicodeBlock KANNADA =
  834               new UnicodeBlock("KANNADA");
  835   
  836           /**
  837            * Constant for the "Malayalam" Unicode character block.
  838            * @since 1.2
  839            */
  840           public static final UnicodeBlock MALAYALAM =
  841               new UnicodeBlock("MALAYALAM");
  842   
  843           /**
  844            * Constant for the "Thai" Unicode character block.
  845            * @since 1.2
  846            */
  847           public static final UnicodeBlock THAI =
  848               new UnicodeBlock("THAI");
  849   
  850           /**
  851            * Constant for the "Lao" Unicode character block.
  852            * @since 1.2
  853            */
  854           public static final UnicodeBlock LAO =
  855               new UnicodeBlock("LAO");
  856   
  857           /**
  858            * Constant for the "Tibetan" Unicode character block.
  859            * @since 1.2
  860            */
  861           public static final UnicodeBlock TIBETAN =
  862               new UnicodeBlock("TIBETAN");
  863   
  864           /**
  865            * Constant for the "Georgian" Unicode character block.
  866            * @since 1.2
  867            */
  868           public static final UnicodeBlock GEORGIAN =
  869               new UnicodeBlock("GEORGIAN");
  870   
  871           /**
  872            * Constant for the "Hangul Jamo" Unicode character block.
  873            * @since 1.2
  874            */
  875           public static final UnicodeBlock HANGUL_JAMO =
  876               new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"});
  877   
  878           /**
  879            * Constant for the "Latin Extended Additional" Unicode character block.
  880            * @since 1.2
  881            */
  882           public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
  883               new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional",
  884                                                                           "LatinExtendedAdditional"});
  885   
  886           /**
  887            * Constant for the "Greek Extended" Unicode character block.
  888            * @since 1.2
  889            */
  890           public static final UnicodeBlock GREEK_EXTENDED =
  891               new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"});
  892   
  893           /**
  894            * Constant for the "General Punctuation" Unicode character block.
  895            * @since 1.2
  896            */
  897           public static final UnicodeBlock GENERAL_PUNCTUATION =
  898               new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"});
  899   
  900           /**
  901            * Constant for the "Superscripts and Subscripts" Unicode character block.
  902            * @since 1.2
  903            */
  904           public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
  905               new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts",
  906                                                                             "SuperscriptsandSubscripts" });
  907   
  908           /**
  909            * Constant for the "Currency Symbols" Unicode character block.
  910            * @since 1.2
  911            */
  912           public static final UnicodeBlock CURRENCY_SYMBOLS =
  913               new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"});
  914   
  915           /**
  916            * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
  917            * <p>
  918            * This block was previously known as "Combining Marks for Symbols".
  919            * @since 1.2
  920            */
  921           public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
  922               new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols",
  923                                                                                                                                                     "CombiningDiacriticalMarksforSymbols",
  924                                                                             "Combining Marks for Symbols",
  925                                                                             "CombiningMarksforSymbols" });
  926   
  927           /**
  928            * Constant for the "Letterlike Symbols" Unicode character block.
  929            * @since 1.2
  930            */
  931           public static final UnicodeBlock LETTERLIKE_SYMBOLS =
  932               new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"});
  933   
  934           /**
  935            * Constant for the "Number Forms" Unicode character block.
  936            * @since 1.2
  937            */
  938           public static final UnicodeBlock NUMBER_FORMS =
  939               new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"});
  940   
  941           /**
  942            * Constant for the "Arrows" Unicode character block.
  943            * @since 1.2
  944            */
  945           public static final UnicodeBlock ARROWS =
  946               new UnicodeBlock("ARROWS");
  947   
  948           /**
  949            * Constant for the "Mathematical Operators" Unicode character block.
  950            * @since 1.2
  951            */
  952           public static final UnicodeBlock MATHEMATICAL_OPERATORS =
  953               new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators",
  954                                                                        "MathematicalOperators"});
  955   
  956           /**
  957            * Constant for the "Miscellaneous Technical" Unicode character block.
  958            * @since 1.2
  959            */
  960           public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
  961               new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical",
  962                                                                         "MiscellaneousTechnical"});
  963   
  964           /**
  965            * Constant for the "Control Pictures" Unicode character block.
  966            * @since 1.2
  967            */
  968           public static final UnicodeBlock CONTROL_PICTURES =
  969               new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"});
  970   
  971           /**
  972            * Constant for the "Optical Character Recognition" Unicode character block.
  973            * @since 1.2
  974            */
  975           public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
  976               new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition",
  977                                                                               "OpticalCharacterRecognition"});
  978   
  979           /**
  980            * Constant for the "Enclosed Alphanumerics" Unicode character block.
  981            * @since 1.2
  982            */
  983           public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
  984               new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics",
  985                                                                        "EnclosedAlphanumerics"});
  986   
  987           /**
  988            * Constant for the "Box Drawing" Unicode character block.
  989            * @since 1.2
  990            */
  991           public static final UnicodeBlock BOX_DRAWING =
  992               new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"});
  993   
  994           /**
  995            * Constant for the "Block Elements" Unicode character block.
  996            * @since 1.2
  997            */
  998           public static final UnicodeBlock BLOCK_ELEMENTS =
  999               new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"});
 1000   
 1001           /**
 1002            * Constant for the "Geometric Shapes" Unicode character block.
 1003            * @since 1.2
 1004            */
 1005           public static final UnicodeBlock GEOMETRIC_SHAPES =
 1006               new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"});
 1007   
 1008           /**
 1009            * Constant for the "Miscellaneous Symbols" Unicode character block.
 1010            * @since 1.2
 1011            */
 1012           public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
 1013               new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols",
 1014                                                                       "MiscellaneousSymbols"});
 1015   
 1016           /**
 1017            * Constant for the "Dingbats" Unicode character block.
 1018            * @since 1.2
 1019            */
 1020           public static final UnicodeBlock DINGBATS =
 1021               new UnicodeBlock("DINGBATS");
 1022   
 1023           /**
 1024            * Constant for the "CJK Symbols and Punctuation" Unicode character block.
 1025            * @since 1.2
 1026            */
 1027           public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
 1028               new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation",
 1029                                                                             "CJKSymbolsandPunctuation"});
 1030   
 1031           /**
 1032            * Constant for the "Hiragana" Unicode character block.
 1033            * @since 1.2
 1034            */
 1035           public static final UnicodeBlock HIRAGANA =
 1036               new UnicodeBlock("HIRAGANA");
 1037   
 1038           /**
 1039            * Constant for the "Katakana" Unicode character block.
 1040            * @since 1.2
 1041            */
 1042           public static final UnicodeBlock KATAKANA =
 1043               new UnicodeBlock("KATAKANA");
 1044   
 1045           /**
 1046            * Constant for the "Bopomofo" Unicode character block.
 1047            * @since 1.2
 1048            */
 1049           public static final UnicodeBlock BOPOMOFO =
 1050               new UnicodeBlock("BOPOMOFO");
 1051   
 1052           /**
 1053            * Constant for the "Hangul Compatibility Jamo" Unicode character block.
 1054            * @since 1.2
 1055            */
 1056           public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
 1057               new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo",
 1058                                                                           "HangulCompatibilityJamo"});
 1059   
 1060           /**
 1061            * Constant for the "Kanbun" Unicode character block.
 1062            * @since 1.2
 1063            */
 1064           public static final UnicodeBlock KANBUN =
 1065               new UnicodeBlock("KANBUN");
 1066   
 1067           /**
 1068            * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
 1069            * @since 1.2
 1070            */
 1071           public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
 1072               new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months",
 1073                                                                                 "EnclosedCJKLettersandMonths"});
 1074   
 1075           /**
 1076            * Constant for the "CJK Compatibility" Unicode character block.
 1077            * @since 1.2
 1078            */
 1079           public static final UnicodeBlock CJK_COMPATIBILITY =
 1080               new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"});
 1081   
 1082           /**
 1083            * Constant for the "CJK Unified Ideographs" Unicode character block.
 1084            * @since 1.2
 1085            */
 1086           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
 1087               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs",
 1088                                                                        "CJKUnifiedIdeographs"});
 1089   
 1090           /**
 1091            * Constant for the "Hangul Syllables" Unicode character block.
 1092            * @since 1.2
 1093            */
 1094           public static final UnicodeBlock HANGUL_SYLLABLES =
 1095               new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"});
 1096   
 1097           /**
 1098            * Constant for the "Private Use Area" Unicode character block.
 1099            * @since 1.2
 1100            */
 1101           public static final UnicodeBlock PRIVATE_USE_AREA =
 1102               new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"});
 1103   
 1104           /**
 1105            * Constant for the "CJK Compatibility Ideographs" Unicode character block.
 1106            * @since 1.2
 1107            */
 1108           public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
 1109               new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
 1110                                new String[] {"CJK Compatibility Ideographs",
 1111                                              "CJKCompatibilityIdeographs"});
 1112   
 1113           /**
 1114            * Constant for the "Alphabetic Presentation Forms" Unicode character block.
 1115            * @since 1.2
 1116            */
 1117           public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
 1118               new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms",
 1119                                                                               "AlphabeticPresentationForms"});
 1120   
 1121           /**
 1122            * Constant for the "Arabic Presentation Forms-A" Unicode character block.
 1123            * @since 1.2
 1124            */
 1125           public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
 1126               new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A",
 1127                                                                             "ArabicPresentationForms-A"});
 1128   
 1129           /**
 1130            * Constant for the "Combining Half Marks" Unicode character block.
 1131            * @since 1.2
 1132            */
 1133           public static final UnicodeBlock COMBINING_HALF_MARKS =
 1134               new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks",
 1135                                                                      "CombiningHalfMarks"});
 1136   
 1137           /**
 1138            * Constant for the "CJK Compatibility Forms" Unicode character block.
 1139            * @since 1.2
 1140            */
 1141           public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
 1142               new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms",
 1143                                                                         "CJKCompatibilityForms"});
 1144   
 1145           /**
 1146            * Constant for the "Small Form Variants" Unicode character block.
 1147            * @since 1.2
 1148            */
 1149           public static final UnicodeBlock SMALL_FORM_VARIANTS =
 1150               new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants",
 1151                                                                     "SmallFormVariants"});
 1152   
 1153           /**
 1154            * Constant for the "Arabic Presentation Forms-B" Unicode character block.
 1155            * @since 1.2
 1156            */
 1157           public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
 1158               new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B",
 1159                                                                             "ArabicPresentationForms-B"});
 1160   
 1161           /**
 1162            * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
 1163            * @since 1.2
 1164            */
 1165           public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
 1166               new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
 1167                                new String[] {"Halfwidth and Fullwidth Forms",
 1168                                              "HalfwidthandFullwidthForms"});
 1169   
 1170           /**
 1171            * Constant for the "Specials" Unicode character block.
 1172            * @since 1.2
 1173            */
 1174           public static final UnicodeBlock SPECIALS =
 1175               new UnicodeBlock("SPECIALS");
 1176   
 1177           /**
 1178            * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
 1179            *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
 1180            *             {@link #LOW_SURROGATES}. These new constants match
 1181            *             the block definitions of the Unicode Standard.
 1182            *             The {@link #of(char)} and {@link #of(int)} methods
 1183            *             return the new constants, not SURROGATES_AREA.
 1184            */
 1185           @Deprecated
 1186           public static final UnicodeBlock SURROGATES_AREA =
 1187               new UnicodeBlock("SURROGATES_AREA");
 1188   
 1189           /**
 1190            * Constant for the "Syriac" Unicode character block.
 1191            * @since 1.4
 1192            */
 1193           public static final UnicodeBlock SYRIAC =
 1194               new UnicodeBlock("SYRIAC");
 1195   
 1196           /**
 1197            * Constant for the "Thaana" Unicode character block.
 1198            * @since 1.4
 1199            */
 1200           public static final UnicodeBlock THAANA =
 1201               new UnicodeBlock("THAANA");
 1202   
 1203           /**
 1204            * Constant for the "Sinhala" Unicode character block.
 1205            * @since 1.4
 1206            */
 1207           public static final UnicodeBlock SINHALA =
 1208               new UnicodeBlock("SINHALA");
 1209   
 1210           /**
 1211            * Constant for the "Myanmar" Unicode character block.
 1212            * @since 1.4
 1213            */
 1214           public static final UnicodeBlock MYANMAR =
 1215               new UnicodeBlock("MYANMAR");
 1216   
 1217           /**
 1218            * Constant for the "Ethiopic" Unicode character block.
 1219            * @since 1.4
 1220            */
 1221           public static final UnicodeBlock ETHIOPIC =
 1222               new UnicodeBlock("ETHIOPIC");
 1223   
 1224           /**
 1225            * Constant for the "Cherokee" Unicode character block.
 1226            * @since 1.4
 1227            */
 1228           public static final UnicodeBlock CHEROKEE =
 1229               new UnicodeBlock("CHEROKEE");
 1230   
 1231           /**
 1232            * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
 1233            * @since 1.4
 1234            */
 1235           public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
 1236               new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
 1237                                new String[] {"Unified Canadian Aboriginal Syllabics",
 1238                                              "UnifiedCanadianAboriginalSyllabics"});
 1239   
 1240           /**
 1241            * Constant for the "Ogham" Unicode character block.
 1242            * @since 1.4
 1243            */
 1244           public static final UnicodeBlock OGHAM =
 1245                                new UnicodeBlock("OGHAM");
 1246   
 1247           /**
 1248            * Constant for the "Runic" Unicode character block.
 1249            * @since 1.4
 1250            */
 1251           public static final UnicodeBlock RUNIC =
 1252                                new UnicodeBlock("RUNIC");
 1253   
 1254           /**
 1255            * Constant for the "Khmer" Unicode character block.
 1256            * @since 1.4
 1257            */
 1258           public static final UnicodeBlock KHMER =
 1259                                new UnicodeBlock("KHMER");
 1260   
 1261           /**
 1262            * Constant for the "Mongolian" Unicode character block.
 1263            * @since 1.4
 1264            */
 1265           public static final UnicodeBlock MONGOLIAN =
 1266                                new UnicodeBlock("MONGOLIAN");
 1267   
 1268           /**
 1269            * Constant for the "Braille Patterns" Unicode character block.
 1270            * @since 1.4
 1271            */
 1272           public static final UnicodeBlock BRAILLE_PATTERNS =
 1273               new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns",
 1274                                                                  "BraillePatterns"});
 1275   
 1276           /**
 1277            * Constant for the "CJK Radicals Supplement" Unicode character block.
 1278            * @since 1.4
 1279            */
 1280           public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
 1281                new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement",
 1282                                                                          "CJKRadicalsSupplement"});
 1283   
 1284           /**
 1285            * Constant for the "Kangxi Radicals" Unicode character block.
 1286            * @since 1.4
 1287            */
 1288           public static final UnicodeBlock KANGXI_RADICALS =
 1289               new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"});
 1290   
 1291           /**
 1292            * Constant for the "Ideographic Description Characters" Unicode character block.
 1293            * @since 1.4
 1294            */
 1295           public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
 1296               new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters",
 1297                                                                                    "IdeographicDescriptionCharacters"});
 1298   
 1299           /**
 1300            * Constant for the "Bopomofo Extended" Unicode character block.
 1301            * @since 1.4
 1302            */
 1303           public static final UnicodeBlock BOPOMOFO_EXTENDED =
 1304               new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended",
 1305                                                                   "BopomofoExtended"});
 1306   
 1307           /**
 1308            * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
 1309            * @since 1.4
 1310            */
 1311           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
 1312               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A",
 1313                                                                                    "CJKUnifiedIdeographsExtensionA"});
 1314   
 1315           /**
 1316            * Constant for the "Yi Syllables" Unicode character block.
 1317            * @since 1.4
 1318            */
 1319           public static final UnicodeBlock YI_SYLLABLES =
 1320               new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"});
 1321   
 1322           /**
 1323            * Constant for the "Yi Radicals" Unicode character block.
 1324            * @since 1.4
 1325            */
 1326           public static final UnicodeBlock YI_RADICALS =
 1327               new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"});
 1328   
 1329   
 1330           /**
 1331            * Constant for the "Cyrillic Supplementary" Unicode character block.
 1332            * @since 1.5
 1333            */
 1334           public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
 1335               new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", new String[] {"Cyrillic Supplementary",
 1336                                                                        "CyrillicSupplementary"});
 1337   
 1338           /**
 1339            * Constant for the "Tagalog" Unicode character block.
 1340            * @since 1.5
 1341            */
 1342           public static final UnicodeBlock TAGALOG =
 1343               new UnicodeBlock("TAGALOG");
 1344   
 1345           /**
 1346            * Constant for the "Hanunoo" Unicode character block.
 1347            * @since 1.5
 1348            */
 1349           public static final UnicodeBlock HANUNOO =
 1350               new UnicodeBlock("HANUNOO");
 1351   
 1352           /**
 1353            * Constant for the "Buhid" Unicode character block.
 1354            * @since 1.5
 1355            */
 1356           public static final UnicodeBlock BUHID =
 1357               new UnicodeBlock("BUHID");
 1358   
 1359           /**
 1360            * Constant for the "Tagbanwa" Unicode character block.
 1361            * @since 1.5
 1362            */
 1363           public static final UnicodeBlock TAGBANWA =
 1364               new UnicodeBlock("TAGBANWA");
 1365   
 1366           /**
 1367            * Constant for the "Limbu" Unicode character block.
 1368            * @since 1.5
 1369            */
 1370           public static final UnicodeBlock LIMBU =
 1371               new UnicodeBlock("LIMBU");
 1372   
 1373           /**
 1374            * Constant for the "Tai Le" Unicode character block.
 1375            * @since 1.5
 1376            */
 1377           public static final UnicodeBlock TAI_LE =
 1378               new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"});
 1379   
 1380           /**
 1381            * Constant for the "Khmer Symbols" Unicode character block.
 1382            * @since 1.5
 1383            */
 1384           public static final UnicodeBlock KHMER_SYMBOLS =
 1385               new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"});
 1386   
 1387           /**
 1388            * Constant for the "Phonetic Extensions" Unicode character block.
 1389            * @since 1.5
 1390            */
 1391           public static final UnicodeBlock PHONETIC_EXTENSIONS =
 1392               new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"});
 1393   
 1394           /**
 1395            * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
 1396            * @since 1.5
 1397            */
 1398           public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
 1399               new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
 1400                                new String[]{"Miscellaneous Mathematical Symbols-A",
 1401                                             "MiscellaneousMathematicalSymbols-A"});
 1402   
 1403           /**
 1404            * Constant for the "Supplemental Arrows-A" Unicode character block.
 1405            * @since 1.5
 1406            */
 1407           public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
 1408               new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A",
 1409                                                                       "SupplementalArrows-A"});
 1410   
 1411           /**
 1412            * Constant for the "Supplemental Arrows-B" Unicode character block.
 1413            * @since 1.5
 1414            */
 1415           public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
 1416               new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B",
 1417                                                                       "SupplementalArrows-B"});
 1418   
 1419           /**
 1420            * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
 1421            * @since 1.5
 1422            */
 1423           public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 1424                   = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
 1425                                      new String[] {"Miscellaneous Mathematical Symbols-B",
 1426                                                    "MiscellaneousMathematicalSymbols-B"});
 1427   
 1428           /**
 1429            * Constant for the "Supplemental Mathematical Operators" Unicode character block.
 1430            * @since 1.5
 1431            */
 1432           public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
 1433               new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
 1434                                new String[]{"Supplemental Mathematical Operators",
 1435                                             "SupplementalMathematicalOperators"} );
 1436   
 1437           /**
 1438            * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
 1439            * @since 1.5
 1440            */
 1441           public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
 1442               new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows",
 1443                                                                                  "MiscellaneousSymbolsandArrows"});
 1444   
 1445           /**
 1446            * Constant for the "Katakana Phonetic Extensions" Unicode character block.
 1447            * @since 1.5
 1448            */
 1449           public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
 1450               new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions",
 1451                                                                              "KatakanaPhoneticExtensions"});
 1452   
 1453           /**
 1454            * Constant for the "Yijing Hexagram Symbols" Unicode character block.
 1455            * @since 1.5
 1456            */
 1457           public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
 1458               new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols",
 1459                                                                         "YijingHexagramSymbols"});
 1460   
 1461           /**
 1462            * Constant for the "Variation Selectors" Unicode character block.
 1463            * @since 1.5
 1464            */
 1465           public static final UnicodeBlock VARIATION_SELECTORS =
 1466               new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"});
 1467   
 1468           /**
 1469            * Constant for the "Linear B Syllabary" Unicode character block.
 1470            * @since 1.5
 1471            */
 1472           public static final UnicodeBlock LINEAR_B_SYLLABARY =
 1473               new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"});
 1474   
 1475           /**
 1476            * Constant for the "Linear B Ideograms" Unicode character block.
 1477            * @since 1.5
 1478            */
 1479           public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
 1480               new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"});
 1481   
 1482           /**
 1483            * Constant for the "Aegean Numbers" Unicode character block.
 1484            * @since 1.5
 1485            */
 1486           public static final UnicodeBlock AEGEAN_NUMBERS =
 1487               new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"});
 1488   
 1489           /**
 1490            * Constant for the "Old Italic" Unicode character block.
 1491            * @since 1.5
 1492            */
 1493           public static final UnicodeBlock OLD_ITALIC =
 1494               new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"});
 1495   
 1496           /**
 1497            * Constant for the "Gothic" Unicode character block.
 1498            * @since 1.5
 1499            */
 1500           public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
 1501   
 1502           /**
 1503            * Constant for the "Ugaritic" Unicode character block.
 1504            * @since 1.5
 1505            */
 1506           public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
 1507   
 1508           /**
 1509            * Constant for the "Deseret" Unicode character block.
 1510            * @since 1.5
 1511            */
 1512           public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
 1513   
 1514           /**
 1515            * Constant for the "Shavian" Unicode character block.
 1516            * @since 1.5
 1517            */
 1518           public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
 1519   
 1520           /**
 1521            * Constant for the "Osmanya" Unicode character block.
 1522            * @since 1.5
 1523            */
 1524           public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
 1525   
 1526           /**
 1527            * Constant for the "Cypriot Syllabary" Unicode character block.
 1528            * @since 1.5
 1529            */
 1530           public static final UnicodeBlock CYPRIOT_SYLLABARY =
 1531               new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"});
 1532   
 1533           /**
 1534            * Constant for the "Byzantine Musical Symbols" Unicode character block.
 1535            * @since 1.5
 1536            */
 1537           public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
 1538               new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols",
 1539                                                                           "ByzantineMusicalSymbols"});
 1540   
 1541           /**
 1542            * Constant for the "Musical Symbols" Unicode character block.
 1543            * @since 1.5
 1544            */
 1545           public static final UnicodeBlock MUSICAL_SYMBOLS =
 1546               new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"});
 1547   
 1548           /**
 1549            * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
 1550            * @since 1.5
 1551            */
 1552           public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
 1553               new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols",
 1554                                                                        "TaiXuanJingSymbols"});
 1555   
 1556           /**
 1557            * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
 1558            * @since 1.5
 1559            */
 1560           public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
 1561               new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
 1562                                new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
 1563   
 1564           /**
 1565            * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
 1566            * @since 1.5
 1567            */
 1568           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
 1569               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
 1570                                new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
 1571   
 1572           /**
 1573            * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
 1574            * @since 1.5
 1575            */
 1576           public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
 1577               new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
 1578                                new String[]{"CJK Compatibility Ideographs Supplement",
 1579                                             "CJKCompatibilityIdeographsSupplement"});
 1580   
 1581           /**
 1582            * Constant for the "Tags" Unicode character block.
 1583            * @since 1.5
 1584            */
 1585           public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
 1586   
 1587           /**
 1588            * Constant for the "Variation Selectors Supplement" Unicode character block.
 1589            * @since 1.5
 1590            */
 1591           public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
 1592               new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement",
 1593                                                                                "VariationSelectorsSupplement"});
 1594   
 1595           /**
 1596            * Constant for the "Supplementary Private Use Area-A" Unicode character block.
 1597            * @since 1.5
 1598            */
 1599           public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
 1600               new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
 1601                                new String[] {"Supplementary Private Use Area-A",
 1602                                              "SupplementaryPrivateUseArea-A"});
 1603   
 1604           /**
 1605            * Constant for the "Supplementary Private Use Area-B" Unicode character block.
 1606            * @since 1.5
 1607            */
 1608           public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
 1609               new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
 1610                                new String[] {"Supplementary Private Use Area-B",
 1611                                              "SupplementaryPrivateUseArea-B"});
 1612   
 1613           /**
 1614            * Constant for the "High Surrogates" Unicode character block.
 1615            * This block represents codepoint values in the high surrogate
 1616            * range: 0xD800 through 0xDB7F
 1617            *
 1618            * @since 1.5
 1619            */
 1620           public static final UnicodeBlock HIGH_SURROGATES =
 1621               new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"});
 1622   
 1623           /**
 1624            * Constant for the "High Private Use Surrogates" Unicode character block.
 1625            * This block represents codepoint values in the high surrogate
 1626            * range: 0xDB80 through 0xDBFF
 1627            *
 1628            * @since 1.5
 1629            */
 1630           public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
 1631               new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates",
 1632                                                                              "HighPrivateUseSurrogates"});
 1633   
 1634           /**
 1635            * Constant for the "Low Surrogates" Unicode character block.
 1636            * This block represents codepoint values in the high surrogate
 1637            * range: 0xDC00 through 0xDFFF
 1638            *
 1639            * @since 1.5
 1640            */
 1641           public static final UnicodeBlock LOW_SURROGATES =
 1642               new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"});
 1643   
 1644           private static final int blockStarts[] = {
 1645               0x0000, // Basic Latin
 1646               0x0080, // Latin-1 Supplement
 1647               0x0100, // Latin Extended-A
 1648               0x0180, // Latin Extended-B
 1649               0x0250, // IPA Extensions
 1650               0x02B0, // Spacing Modifier Letters
 1651               0x0300, // Combining Diacritical Marks
 1652               0x0370, // Greek and Coptic
 1653               0x0400, // Cyrillic
 1654               0x0500, // Cyrillic Supplementary
 1655               0x0530, // Armenian
 1656               0x0590, // Hebrew
 1657               0x0600, // Arabic
 1658               0x0700, // Syriac
 1659               0x0750, // unassigned
 1660               0x0780, // Thaana
 1661               0x07C0, // unassigned
 1662               0x0900, // Devanagari
 1663               0x0980, // Bengali
 1664               0x0A00, // Gurmukhi
 1665               0x0A80, // Gujarati
 1666               0x0B00, // Oriya
 1667               0x0B80, // Tamil
 1668               0x0C00, // Telugu
 1669               0x0C80, // Kannada
 1670               0x0D00, // Malayalam
 1671               0x0D80, // Sinhala
 1672               0x0E00, // Thai
 1673               0x0E80, // Lao
 1674               0x0F00, // Tibetan
 1675               0x1000, // Myanmar
 1676               0x10A0, // Georgian
 1677               0x1100, // Hangul Jamo
 1678               0x1200, // Ethiopic
 1679               0x1380, // unassigned
 1680               0x13A0, // Cherokee
 1681               0x1400, // Unified Canadian Aboriginal Syllabics
 1682               0x1680, // Ogham
 1683               0x16A0, // Runic
 1684               0x1700, // Tagalog
 1685               0x1720, // Hanunoo
 1686               0x1740, // Buhid
 1687               0x1760, // Tagbanwa
 1688               0x1780, // Khmer
 1689               0x1800, // Mongolian
 1690               0x18B0, // unassigned
 1691               0x1900, // Limbu
 1692               0x1950, // Tai Le
 1693               0x1980, // unassigned
 1694               0x19E0, // Khmer Symbols
 1695               0x1A00, // unassigned
 1696               0x1D00, // Phonetic Extensions
 1697               0x1D80, // unassigned
 1698               0x1E00, // Latin Extended Additional
 1699               0x1F00, // Greek Extended
 1700               0x2000, // General Punctuation
 1701               0x2070, // Superscripts and Subscripts
 1702               0x20A0, // Currency Symbols
 1703               0x20D0, // Combining Diacritical Marks for Symbols
 1704               0x2100, // Letterlike Symbols
 1705               0x2150, // Number Forms
 1706               0x2190, // Arrows
 1707               0x2200, // Mathematical Operators
 1708               0x2300, // Miscellaneous Technical
 1709               0x2400, // Control Pictures
 1710               0x2440, // Optical Character Recognition
 1711               0x2460, // Enclosed Alphanumerics
 1712               0x2500, // Box Drawing
 1713               0x2580, // Block Elements
 1714               0x25A0, // Geometric Shapes
 1715               0x2600, // Miscellaneous Symbols
 1716               0x2700, // Dingbats
 1717               0x27C0, // Miscellaneous Mathematical Symbols-A
 1718               0x27F0, // Supplemental Arrows-A
 1719               0x2800, // Braille Patterns
 1720               0x2900, // Supplemental Arrows-B
 1721               0x2980, // Miscellaneous Mathematical Symbols-B
 1722               0x2A00, // Supplemental Mathematical Operators
 1723               0x2B00, // Miscellaneous Symbols and Arrows
 1724               0x2C00, // unassigned
 1725               0x2E80, // CJK Radicals Supplement
 1726               0x2F00, // Kangxi Radicals
 1727               0x2FE0, // unassigned
 1728               0x2FF0, // Ideographic Description Characters
 1729               0x3000, // CJK Symbols and Punctuation
 1730               0x3040, // Hiragana
 1731               0x30A0, // Katakana
 1732               0x3100, // Bopomofo
 1733               0x3130, // Hangul Compatibility Jamo
 1734               0x3190, // Kanbun
 1735               0x31A0, // Bopomofo Extended
 1736               0x31C0, // unassigned
 1737               0x31F0, // Katakana Phonetic Extensions
 1738               0x3200, // Enclosed CJK Letters and Months
 1739               0x3300, // CJK Compatibility
 1740               0x3400, // CJK Unified Ideographs Extension A
 1741               0x4DC0, // Yijing Hexagram Symbols
 1742               0x4E00, // CJK Unified Ideographs
 1743               0xA000, // Yi Syllables
 1744               0xA490, // Yi Radicals
 1745               0xA4D0, // unassigned
 1746               0xAC00, // Hangul Syllables
 1747               0xD7B0, // unassigned
 1748               0xD800, // High Surrogates
 1749               0xDB80, // High Private Use Surrogates
 1750               0xDC00, // Low Surrogates
 1751               0xE000, // Private Use
 1752               0xF900, // CJK Compatibility Ideographs
 1753               0xFB00, // Alphabetic Presentation Forms
 1754               0xFB50, // Arabic Presentation Forms-A
 1755               0xFE00, // Variation Selectors
 1756               0xFE10, // unassigned
 1757               0xFE20, // Combining Half Marks
 1758               0xFE30, // CJK Compatibility Forms
 1759               0xFE50, // Small Form Variants
 1760               0xFE70, // Arabic Presentation Forms-B
 1761               0xFF00, // Halfwidth and Fullwidth Forms
 1762               0xFFF0, // Specials
 1763               0x10000, // Linear B Syllabary
 1764               0x10080, // Linear B Ideograms
 1765               0x10100, // Aegean Numbers
 1766               0x10140, // unassigned
 1767               0x10300, // Old Italic
 1768               0x10330, // Gothic
 1769               0x10350, // unassigned
 1770               0x10380, // Ugaritic
 1771               0x103A0, // unassigned
 1772               0x10400, // Deseret
 1773               0x10450, // Shavian
 1774               0x10480, // Osmanya
 1775               0x104B0, // unassigned
 1776               0x10800, // Cypriot Syllabary
 1777               0x10840, // unassigned
 1778               0x1D000, // Byzantine Musical Symbols
 1779               0x1D100, // Musical Symbols
 1780               0x1D200, // unassigned
 1781               0x1D300, // Tai Xuan Jing Symbols
 1782               0x1D360, // unassigned
 1783               0x1D400, // Mathematical Alphanumeric Symbols
 1784               0x1D800, // unassigned
 1785               0x20000, // CJK Unified Ideographs Extension B
 1786               0x2A6E0, // unassigned
 1787               0x2F800, // CJK Compatibility Ideographs Supplement
 1788               0x2FA20, // unassigned
 1789               0xE0000, // Tags
 1790               0xE0080, // unassigned
 1791               0xE0100, // Variation Selectors Supplement
 1792               0xE01F0, // unassigned
 1793               0xF0000, // Supplementary Private Use Area-A
 1794               0x100000, // Supplementary Private Use Area-B
 1795           };
 1796   
 1797           private static final UnicodeBlock[] blocks = {
 1798               BASIC_LATIN,
 1799               LATIN_1_SUPPLEMENT,
 1800               LATIN_EXTENDED_A,
 1801               LATIN_EXTENDED_B,
 1802               IPA_EXTENSIONS,
 1803               SPACING_MODIFIER_LETTERS,
 1804               COMBINING_DIACRITICAL_MARKS,
 1805               GREEK,
 1806               CYRILLIC,
 1807               CYRILLIC_SUPPLEMENTARY,
 1808               ARMENIAN,
 1809               HEBREW,
 1810               ARABIC,
 1811               SYRIAC,
 1812               null,
 1813               THAANA,
 1814               null,
 1815               DEVANAGARI,
 1816               BENGALI,
 1817               GURMUKHI,
 1818               GUJARATI,
 1819               ORIYA,
 1820               TAMIL,
 1821               TELUGU,
 1822               KANNADA,
 1823               MALAYALAM,
 1824               SINHALA,
 1825               THAI,
 1826               LAO,
 1827               TIBETAN,
 1828               MYANMAR,
 1829               GEORGIAN,
 1830               HANGUL_JAMO,
 1831               ETHIOPIC,
 1832               null,
 1833               CHEROKEE,
 1834               UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
 1835               OGHAM,
 1836               RUNIC,
 1837               TAGALOG,
 1838               HANUNOO,
 1839               BUHID,
 1840               TAGBANWA,
 1841               KHMER,
 1842               MONGOLIAN,
 1843               null,
 1844               LIMBU,
 1845               TAI_LE,
 1846               null,
 1847               KHMER_SYMBOLS,
 1848               null,
 1849               PHONETIC_EXTENSIONS,
 1850               null,
 1851               LATIN_EXTENDED_ADDITIONAL,
 1852               GREEK_EXTENDED,
 1853               GENERAL_PUNCTUATION,
 1854               SUPERSCRIPTS_AND_SUBSCRIPTS,
 1855               CURRENCY_SYMBOLS,
 1856               COMBINING_MARKS_FOR_SYMBOLS,
 1857               LETTERLIKE_SYMBOLS,
 1858               NUMBER_FORMS,
 1859               ARROWS,
 1860               MATHEMATICAL_OPERATORS,
 1861               MISCELLANEOUS_TECHNICAL,
 1862               CONTROL_PICTURES,
 1863               OPTICAL_CHARACTER_RECOGNITION,
 1864               ENCLOSED_ALPHANUMERICS,
 1865               BOX_DRAWING,
 1866               BLOCK_ELEMENTS,
 1867               GEOMETRIC_SHAPES,
 1868               MISCELLANEOUS_SYMBOLS,
 1869               DINGBATS,
 1870               MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
 1871               SUPPLEMENTAL_ARROWS_A,
 1872               BRAILLE_PATTERNS,
 1873               SUPPLEMENTAL_ARROWS_B,
 1874               MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
 1875               SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
 1876               MISCELLANEOUS_SYMBOLS_AND_ARROWS,
 1877               null,
 1878               CJK_RADICALS_SUPPLEMENT,
 1879               KANGXI_RADICALS,
 1880               null,
 1881               IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
 1882               CJK_SYMBOLS_AND_PUNCTUATION,
 1883               HIRAGANA,
 1884               KATAKANA,
 1885               BOPOMOFO,
 1886               HANGUL_COMPATIBILITY_JAMO,
 1887               KANBUN,
 1888               BOPOMOFO_EXTENDED,
 1889               null,
 1890               KATAKANA_PHONETIC_EXTENSIONS,
 1891               ENCLOSED_CJK_LETTERS_AND_MONTHS,
 1892               CJK_COMPATIBILITY,
 1893               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
 1894               YIJING_HEXAGRAM_SYMBOLS,
 1895               CJK_UNIFIED_IDEOGRAPHS,
 1896               YI_SYLLABLES,
 1897               YI_RADICALS,
 1898               null,
 1899               HANGUL_SYLLABLES,
 1900               null,
 1901               HIGH_SURROGATES,
 1902               HIGH_PRIVATE_USE_SURROGATES,
 1903               LOW_SURROGATES,
 1904               PRIVATE_USE_AREA,
 1905               CJK_COMPATIBILITY_IDEOGRAPHS,
 1906               ALPHABETIC_PRESENTATION_FORMS,
 1907               ARABIC_PRESENTATION_FORMS_A,
 1908               VARIATION_SELECTORS,
 1909               null,
 1910               COMBINING_HALF_MARKS,
 1911               CJK_COMPATIBILITY_FORMS,
 1912               SMALL_FORM_VARIANTS,
 1913               ARABIC_PRESENTATION_FORMS_B,
 1914               HALFWIDTH_AND_FULLWIDTH_FORMS,
 1915               SPECIALS,
 1916               LINEAR_B_SYLLABARY,
 1917               LINEAR_B_IDEOGRAMS,
 1918               AEGEAN_NUMBERS,
 1919               null,
 1920               OLD_ITALIC,
 1921               GOTHIC,
 1922               null,
 1923               UGARITIC,
 1924               null,
 1925               DESERET,
 1926               SHAVIAN,
 1927               OSMANYA,
 1928               null,
 1929               CYPRIOT_SYLLABARY,
 1930               null,
 1931               BYZANTINE_MUSICAL_SYMBOLS,
 1932               MUSICAL_SYMBOLS,
 1933               null,
 1934               TAI_XUAN_JING_SYMBOLS,
 1935               null,
 1936               MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
 1937               null,
 1938               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
 1939               null,
 1940               CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
 1941               null,
 1942               TAGS,
 1943               null,
 1944               VARIATION_SELECTORS_SUPPLEMENT,
 1945               null,
 1946               SUPPLEMENTARY_PRIVATE_USE_AREA_A,
 1947               SUPPLEMENTARY_PRIVATE_USE_AREA_B
 1948           };
 1949   
 1950   
 1951           /**
 1952            * Returns the object representing the Unicode block containing the
 1953            * given character, or <code>null</code> if the character is not a
 1954            * member of a defined block.
 1955            *
 1956                    * <p><b>Note:</b> This method cannot handle <a
 1957                    * href="Character.html#supplementary"> supplementary
 1958                    * characters</a>. To support all Unicode characters,
 1959                    * including supplementary characters, use the {@link
 1960                    * #of(int)} method.
 1961            *
 1962            * @param   c  The character in question
 1963            * @return  The <code>UnicodeBlock</code> instance representing the
 1964            *          Unicode block of which this character is a member, or
 1965            *          <code>null</code> if the character is not a member of any
 1966            *          Unicode block
 1967            */
 1968           public static UnicodeBlock of(char c) {
 1969               return of((int)c);
 1970           }
 1971   
 1972   
 1973           /**
 1974            * Returns the object representing the Unicode block
 1975            * containing the given character (Unicode code point), or
 1976            * <code>null</code> if the character is not a member of a
 1977            * defined block.
 1978            *
 1979                    * @param   codePoint the character (Unicode code point) in question.
 1980            * @return  The <code>UnicodeBlock</code> instance representing the
 1981            *          Unicode block of which this character is a member, or
 1982            *          <code>null</code> if the character is not a member of any
 1983            *          Unicode block
 1984                    * @exception IllegalArgumentException if the specified
 1985                    * <code>codePoint</code> is an invalid Unicode code point.
 1986                    * @see Character#isValidCodePoint(int)
 1987                    * @since   1.5
 1988            */
 1989           public static UnicodeBlock of(int codePoint) {
 1990               if (!isValidCodePoint(codePoint)) {
 1991                   throw new IllegalArgumentException();
 1992               }
 1993   
 1994               int top, bottom, current;
 1995               bottom = 0;
 1996               top = blockStarts.length;
 1997               current = top/2;
 1998   
 1999               // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
 2000               while (top - bottom > 1) {
 2001                   if (codePoint >= blockStarts[current]) {
 2002                       bottom = current;
 2003                   } else {
 2004                       top = current;
 2005                   }
 2006                   current = (top + bottom) / 2;
 2007               }
 2008               return blocks[current];
 2009           }
 2010   
 2011           /**
 2012            * Returns the UnicodeBlock with the given name. Block
 2013            * names are determined by The Unicode Standard. The file
 2014            * Blocks-&lt;version&gt;.txt defines blocks for a particular
 2015            * version of the standard. The {@link Character} class specifies
 2016            * the version of the standard that it supports.
 2017            * <p>
 2018            * This method accepts block names in the following forms:
 2019            * <ol>
 2020            * <li> Canonical block names as defined by the Unicode Standard.
 2021            * For example, the standard defines a "Basic Latin" block. Therefore, this
 2022            * method accepts "Basic Latin" as a valid block name. The documentation of
 2023            * each UnicodeBlock provides the canonical name.
 2024            * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
 2025            * is a valid block name for the "Basic Latin" block.
 2026            * <li>The text representation of each constant UnicodeBlock identifier.
 2027            * For example, this method will return the {@link #BASIC_LATIN} block if
 2028            * provided with the "BASIC_LATIN" name. This form replaces all spaces and
 2029            *  hyphens in the canonical name with underscores.
 2030            * </ol>
 2031            * Finally, character case is ignored for all of the valid block name forms.
 2032            * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
 2033            * The en_US locale's case mapping rules are used to provide case-insensitive
 2034            * string comparisons for block name validation.
 2035            * <p>
 2036            * If the Unicode Standard changes block names, both the previous and
 2037            * current names will be accepted.
 2038            *
 2039            * @param blockName A <code>UnicodeBlock</code> name.
 2040            * @return The <code>UnicodeBlock</code> instance identified
 2041            *         by <code>blockName</code>
 2042            * @throws IllegalArgumentException if <code>blockName</code> is an
 2043            *         invalid name
 2044            * @throws NullPointerException if <code>blockName</code> is null
 2045            * @since 1.5
 2046            */
 2047           public static final UnicodeBlock forName(String blockName) {
 2048               UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
 2049               if (block == null) {
 2050                   throw new IllegalArgumentException();
 2051               }
 2052               return block;
 2053           }
 2054       }
 2055   
 2056   
 2057       /**
 2058        * The value of the <code>Character</code>.
 2059        *
 2060        * @serial
 2061        */
 2062       private final char value;
 2063   
 2064       /** use serialVersionUID from JDK 1.0.2 for interoperability */
 2065       private static final long serialVersionUID = 3786198910865385080L;
 2066   
 2067       /**
 2068        * Constructs a newly allocated <code>Character</code> object that
 2069        * represents the specified <code>char</code> value.
 2070        *
 2071        * @param  value   the value to be represented by the
 2072        *                  <code>Character</code> object.
 2073        */
 2074       public Character(char value) {
 2075           this.value = value;
 2076       }
 2077   
 2078       private static class CharacterCache {
 2079           private CharacterCache(){}
 2080   
 2081           static final Character cache[] = new Character[127 + 1];
 2082   
 2083           static {
 2084               for(int i = 0; i < cache.length; i++)
 2085                   cache[i] = new Character((char)i);
 2086           }
 2087       }
 2088   
 2089       /**
 2090        * Returns a <tt>Character</tt> instance representing the specified
 2091        * <tt>char</tt> value.
 2092        * If a new <tt>Character</tt> instance is not required, this method
 2093        * should generally be used in preference to the constructor
 2094        * {@link #Character(char)}, as this method is likely to yield
 2095        * significantly better space and time performance by caching
 2096        * frequently requested values.
 2097        *
 2098        * @param  c a char value.
 2099        * @return a <tt>Character</tt> instance representing <tt>c</tt>.
 2100        * @since  1.5
 2101        */
 2102       public static Character valueOf(char c) {
 2103           if(c <= 127) { // must cache
 2104               return CharacterCache.cache[(int)c];
 2105           }
 2106           return new Character(c);
 2107       }
 2108   
 2109       /**
 2110        * Returns the value of this <code>Character</code> object.
 2111        * @return  the primitive <code>char</code> value represented by
 2112        *          this object.
 2113        */
 2114       public char charValue() {
 2115           return value;
 2116       }
 2117   
 2118       /**
 2119        * Returns a hash code for this <code>Character</code>.
 2120        * @return  a hash code value for this object.
 2121        */
 2122       public int hashCode() {
 2123           return (int)value;
 2124       }
 2125   
 2126       /**
 2127        * Compares this object against the specified object.
 2128        * The result is <code>true</code> if and only if the argument is not
 2129        * <code>null</code> and is a <code>Character</code> object that
 2130        * represents the same <code>char</code> value as this object.
 2131        *
 2132        * @param   obj   the object to compare with.
 2133        * @return  <code>true</code> if the objects are the same;
 2134        *          <code>false</code> otherwise.
 2135        */
 2136       public boolean equals(Object obj) {
 2137           if (obj instanceof Character) {
 2138               return value == ((Character)obj).charValue();
 2139           }
 2140           return false;
 2141       }
 2142   
 2143       /**
 2144        * Returns a <code>String</code> object representing this
 2145        * <code>Character</code>'s value.  The result is a string of
 2146        * length 1 whose sole component is the primitive
 2147        * <code>char</code> value represented by this
 2148        * <code>Character</code> object.
 2149        *
 2150        * @return  a string representation of this object.
 2151        */
 2152       public String toString() {
 2153           char buf[] = {value};
 2154           return String.valueOf(buf);
 2155       }
 2156   
 2157       /**
 2158        * Returns a <code>String</code> object representing the
 2159        * specified <code>char</code>.  The result is a string of length
 2160        * 1 consisting solely of the specified <code>char</code>.
 2161        *
 2162        * @param c the <code>char</code> to be converted
 2163        * @return the string representation of the specified <code>char</code>
 2164        * @since 1.4
 2165        */
 2166       public static String toString(char c) {
 2167           return String.valueOf(c);
 2168       }
 2169   
 2170       /**
 2171        * Determines whether the specified code point is a valid Unicode
 2172        * code point value in the range of <code>0x0000</code> to
 2173        * <code>0x10FFFF</code> inclusive. This method is equivalent to
 2174        * the expression:
 2175        *
 2176        * <blockquote><pre>
 2177        * codePoint >= 0x0000 && codePoint <= 0x10FFFF
 2178        * </pre></blockquote>
 2179        *
 2180        * @param  codePoint the Unicode code point to be tested
 2181        * @return <code>true</code> if the specified code point value
 2182        * is a valid code point value;
 2183        * <code>false</code> otherwise.
 2184        * @since  1.5
 2185        */
 2186       public static boolean isValidCodePoint(int codePoint) {
 2187           return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
 2188       }
 2189   
 2190       /**
 2191        * Determines whether the specified character (Unicode code point)
 2192        * is in the supplementary character range. The method call is
 2193        * equivalent to the expression:
 2194        * <blockquote><pre>
 2195        * codePoint >= 0x10000 && codePoint <= 0x10FFFF
 2196        * </pre></blockquote>
 2197        *
 2198        * @param  codePoint the character (Unicode code point) to be tested
 2199        * @return <code>true</code> if the specified character is in the Unicode
 2200        *         supplementary character range; <code>false</code> otherwise.
 2201        * @since  1.5
 2202        */
 2203       public static boolean isSupplementaryCodePoint(int codePoint) {
 2204           return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
 2205               && codePoint <= MAX_CODE_POINT;
 2206       }
 2207   
 2208       /**
 2209        * Determines if the given <code>char</code> value is a
 2210        * high-surrogate code unit (also known as <i>leading-surrogate
 2211        * code unit</i>). Such values do not represent characters by
 2212        * themselves, but are used in the representation of <a
 2213        * href="#supplementary">supplementary characters</a> in the
 2214        * UTF-16 encoding.
 2215        *
 2216        * <p>This method returns <code>true</code> if and only if
 2217        * <blockquote><pre>ch >= '&#92;uD800' && ch <= '&#92;uDBFF'
 2218        * </pre></blockquote>
 2219        * is <code>true</code>.
 2220        *
 2221        * @param   ch   the <code>char</code> value to be tested.
 2222        * @return  <code>true</code> if the <code>char</code> value
 2223        *          is between '&#92;uD800' and '&#92;uDBFF' inclusive;
 2224        *          <code>false</code> otherwise.
 2225        * @see     java.lang.Character#isLowSurrogate(char)
 2226        * @see     Character.UnicodeBlock#of(int)
 2227        * @since   1.5
 2228        */
 2229       public static boolean isHighSurrogate(char ch) {
 2230           return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
 2231       }
 2232   
 2233       /**
 2234        * Determines if the given <code>char</code> value is a
 2235        * low-surrogate code unit (also known as <i>trailing-surrogate code
 2236        * unit</i>). Such values do not represent characters by themselves,
 2237        * but are used in the representation of <a
 2238        * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
 2239        *
 2240        * <p> This method returns <code>true</code> if and only if
 2241        * <blockquote><pre>ch >= '&#92;uDC00' && ch <= '&#92;uDFFF'
 2242        * </pre></blockquote> is <code>true</code>.
 2243        *
 2244        * @param   ch   the <code>char</code> value to be tested.
 2245        * @return  <code>true</code> if the <code>char</code> value
 2246        *          is between '&#92;uDC00' and '&#92;uDFFF' inclusive;
 2247        *          <code>false</code> otherwise.
 2248        * @see java.lang.Character#isHighSurrogate(char)
 2249        * @since   1.5
 2250        */
 2251       public static boolean isLowSurrogate(char ch) {
 2252           return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
 2253       }
 2254   
 2255       /**
 2256        * Determines whether the specified pair of <code>char</code>
 2257        * values is a valid surrogate pair. This method is equivalent to
 2258        * the expression:
 2259        * <blockquote><pre>
 2260        * isHighSurrogate(high) && isLowSurrogate(low)
 2261        * </pre></blockquote>
 2262        *
 2263        * @param  high the high-surrogate code value to be tested
 2264        * @param  low the low-surrogate code value to be tested
 2265        * @return <code>true</code> if the specified high and
 2266        * low-surrogate code values represent a valid surrogate pair;
 2267        * <code>false</code> otherwise.
 2268        * @since  1.5
 2269        */
 2270       public static boolean isSurrogatePair(char high, char low) {
 2271           return isHighSurrogate(high) && isLowSurrogate(low);
 2272       }
 2273   
 2274       /**
 2275        * Determines the number of <code>char</code> values needed to
 2276        * represent the specified character (Unicode code point). If the
 2277        * specified character is equal to or greater than 0x10000, then
 2278        * the method returns 2. Otherwise, the method returns 1.
 2279        *
 2280        * <p>This method doesn't validate the specified character to be a
 2281        * valid Unicode code point. The caller must validate the
 2282        * character value using {@link #isValidCodePoint(int) isValidCodePoint}
 2283        * if necessary.
 2284        *
 2285        * @param   codePoint the character (Unicode code point) to be tested.
 2286        * @return  2 if the character is a valid supplementary character; 1 otherwise.
 2287        * @see     #isSupplementaryCodePoint(int)
 2288        * @since   1.5
 2289        */
 2290       public static int charCount(int codePoint) {
 2291           return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
 2292       }
 2293   
 2294       /**
 2295        * Converts the specified surrogate pair to its supplementary code
 2296        * point value. This method does not validate the specified
 2297        * surrogate pair. The caller must validate it using {@link
 2298        * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
 2299        *
 2300        * @param  high the high-surrogate code unit
 2301        * @param  low the low-surrogate code unit
 2302        * @return the supplementary code point composed from the
 2303        *         specified surrogate pair.
 2304        * @since  1.5
 2305        */
 2306       public static int toCodePoint(char high, char low) {
 2307           return ((high - MIN_HIGH_SURROGATE) << 10)
 2308               + (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
 2309       }
 2310   
 2311       /**
 2312        * Returns the code point at the given index of the
 2313        * <code>CharSequence</code>. If the <code>char</code> value at
 2314        * the given index in the <code>CharSequence</code> is in the
 2315        * high-surrogate range, the following index is less than the
 2316        * length of the <code>CharSequence</code>, and the
 2317        * <code>char</code> value at the following index is in the
 2318        * low-surrogate range, then the supplementary code point
 2319        * corresponding to this surrogate pair is returned. Otherwise,
 2320        * the <code>char</code> value at the given index is returned.
 2321        *
 2322        * @param seq a sequence of <code>char</code> values (Unicode code
 2323        * units)
 2324        * @param index the index to the <code>char</code> values (Unicode
 2325        * code units) in <code>seq</code> to be converted
 2326        * @return the Unicode code point at the given index
 2327        * @exception NullPointerException if <code>seq</code> is null.
 2328        * @exception IndexOutOfBoundsException if the value
 2329        * <code>index</code> is negative or not less than
 2330        * {@link CharSequence#length() seq.length()}.
 2331        * @since  1.5
 2332        */
 2333       public static int codePointAt(CharSequence seq, int index) {
 2334           char c1 = seq.charAt(index++);
 2335           if (isHighSurrogate(c1)) {
 2336               if (index < seq.length()) {
 2337                   char c2 = seq.charAt(index);
 2338                   if (isLowSurrogate(c2)) {
 2339                       return toCodePoint(c1, c2);
 2340                   }
 2341               }
 2342           }
 2343           return c1;
 2344       }
 2345   
 2346       /**
 2347        * Returns the code point at the given index of the
 2348        * <code>char</code> array. If the <code>char</code> value at
 2349        * the given index in the <code>char</code> array is in the
 2350        * high-surrogate range, the following index is less than the
 2351        * length of the <code>char</code> array, and the
 2352        * <code>char</code> value at the following index is in the
 2353        * low-surrogate range, then the supplementary code point
 2354        * corresponding to this surrogate pair is returned. Otherwise,
 2355        * the <code>char</code> value at the given index is returned.
 2356        *
 2357        * @param a the <code>char</code> array
 2358        * @param index the index to the <code>char</code> values (Unicode
 2359        * code units) in the <code>char</code> array to be converted
 2360        * @return the Unicode code point at the given index
 2361        * @exception NullPointerException if <code>a</code> is null.
 2362        * @exception IndexOutOfBoundsException if the value
 2363        * <code>index</code> is negative or not less than
 2364        * the length of the <code>char</code> array.
 2365        * @since  1.5
 2366        */
 2367       public static int codePointAt(char[] a, int index) {
 2368           return codePointAtImpl(a, index, a.length);
 2369       }
 2370   
 2371       /**
 2372        * Returns the code point at the given index of the
 2373        * <code>char</code> array, where only array elements with
 2374        * <code>index</code> less than <code>limit</code> can be used. If
 2375        * the <code>char</code> value at the given index in the
 2376        * <code>char</code> array is in the high-surrogate range, the
 2377        * following index is less than the <code>limit</code>, and the
 2378        * <code>char</code> value at the following index is in the
 2379        * low-surrogate range, then the supplementary code point
 2380        * corresponding to this surrogate pair is returned. Otherwise,
 2381        * the <code>char</code> value at the given index is returned.
 2382        *
 2383        * @param a the <code>char</code> array
 2384        * @param index the index to the <code>char</code> values (Unicode
 2385        * code units) in the <code>char</code> array to be converted
 2386        * @param limit the index after the last array element that can be used in the
 2387        * <code>char</code> array
 2388        * @return the Unicode code point at the given index
 2389        * @exception NullPointerException if <code>a</code> is null.
 2390        * @exception IndexOutOfBoundsException if the <code>index</code>
 2391        * argument is negative or not less than the <code>limit</code>
 2392        * argument, or if the <code>limit</code> argument is negative or
 2393        * greater than the length of the <code>char</code> array.
 2394        * @since  1.5
 2395        */
 2396       public static int codePointAt(char[] a, int index, int limit) {
 2397           if (index >= limit || limit < 0 || limit > a.length) {
 2398               throw new IndexOutOfBoundsException();
 2399           }
 2400           return codePointAtImpl(a, index, limit);
 2401       }
 2402   
 2403       static int codePointAtImpl(char[] a, int index, int limit) {
 2404           char c1 = a[index++];
 2405           if (isHighSurrogate(c1)) {
 2406               if (index < limit) {
 2407                   char c2 = a[index];
 2408                   if (isLowSurrogate(c2)) {
 2409                       return toCodePoint(c1, c2);
 2410                   }
 2411               }
 2412           }
 2413           return c1;
 2414       }
 2415   
 2416       /**
 2417        * Returns the code point preceding the given index of the
 2418        * <code>CharSequence</code>. If the <code>char</code> value at
 2419        * <code>(index - 1)</code> in the <code>CharSequence</code> is in
 2420        * the low-surrogate range, <code>(index - 2)</code> is not
 2421        * negative, and the <code>char</code> value at <code>(index -
 2422        * 2)</code> in the <code>CharSequence</code> is in the
 2423        * high-surrogate range, then the supplementary code point
 2424        * corresponding to this surrogate pair is returned. Otherwise,
 2425        * the <code>char</code> value at <code>(index - 1)</code> is
 2426        * returned.
 2427        *
 2428        * @param seq the <code>CharSequence</code> instance
 2429        * @param index the index following the code point that should be returned
 2430        * @return the Unicode code point value before the given index.
 2431        * @exception NullPointerException if <code>seq</code> is null.
 2432        * @exception IndexOutOfBoundsException if the <code>index</code>
 2433        * argument is less than 1 or greater than {@link
 2434        * CharSequence#length() seq.length()}.
 2435        * @since  1.5
 2436        */
 2437       public static int codePointBefore(CharSequence seq, int index) {
 2438           char c2 = seq.charAt(--index);
 2439           if (isLowSurrogate(c2)) {
 2440               if (index > 0) {
 2441                   char c1 = seq.charAt(--index);
 2442                   if (isHighSurrogate(c1)) {
 2443                       return toCodePoint(c1, c2);
 2444                   }
 2445               }
 2446           }
 2447           return c2;
 2448       }
 2449   
 2450       /**
 2451        * Returns the code point preceding the given index of the
 2452        * <code>char</code> array. If the <code>char</code> value at
 2453        * <code>(index - 1)</code> in the <code>char</code> array is in
 2454        * the low-surrogate range, <code>(index - 2)</code> is not
 2455        * negative, and the <code>char</code> value at <code>(index -
 2456        * 2)</code> in the <code>char</code> array is in the
 2457        * high-surrogate range, then the supplementary code point
 2458        * corresponding to this surrogate pair is returned. Otherwise,
 2459        * the <code>char</code> value at <code>(index - 1)</code> is
 2460        * returned.
 2461        *
 2462        * @param a the <code>char</code> array
 2463        * @param index the index following the code point that should be returned
 2464        * @return the Unicode code point value before the given index.
 2465        * @exception NullPointerException if <code>a</code> is null.
 2466        * @exception IndexOutOfBoundsException if the <code>index</code>
 2467        * argument is less than 1 or greater than the length of the
 2468        * <code>char</code> array
 2469        * @since  1.5
 2470        */
 2471       public static int codePointBefore(char[] a, int index) {
 2472           return codePointBeforeImpl(a, index, 0);
 2473       }
 2474   
 2475       /**
 2476        * Returns the code point preceding the given index of the
 2477        * <code>char</code> array, where only array elements with
 2478        * <code>index</code> greater than or equal to <code>start</code>
 2479        * can be used. If the <code>char</code> value at <code>(index -
 2480        * 1)</code> in the <code>char</code> array is in the
 2481        * low-surrogate range, <code>(index - 2)</code> is not less than
 2482        * <code>start</code>, and the <code>char</code> value at
 2483        * <code>(index - 2)</code> in the <code>char</code> array is in
 2484        * the high-surrogate range, then the supplementary code point
 2485        * corresponding to this surrogate pair is returned. Otherwise,
 2486        * the <code>char</code> value at <code>(index - 1)</code> is
 2487        * returned.
 2488        *
 2489        * @param a the <code>char</code> array
 2490        * @param index the index following the code point that should be returned
 2491        * @param start the index of the first array element in the
 2492        * <code>char</code> array
 2493        * @return the Unicode code point value before the given index.
 2494        * @exception NullPointerException if <code>a</code> is null.
 2495        * @exception IndexOutOfBoundsException if the <code>index</code>
 2496        * argument is not greater than the <code>start</code> argument or
 2497        * is greater than the length of the <code>char</code> array, or
 2498        * if the <code>start</code> argument is negative or not less than
 2499        * the length of the <code>char</code> array.
 2500        * @since  1.5
 2501        */
 2502       public static int codePointBefore(char[] a, int index, int start) {
 2503           if (index <= start || start < 0 || start >= a.length) {
 2504               throw new IndexOutOfBoundsException();
 2505           }
 2506           return codePointBeforeImpl(a, index, start);
 2507       }
 2508   
 2509       static int codePointBeforeImpl(char[] a, int index, int start) {
 2510           char c2 = a[--index];
 2511           if (isLowSurrogate(c2)) {
 2512               if (index > start) {
 2513                   char c1 = a[--index];
 2514                   if (isHighSurrogate(c1)) {
 2515                       return toCodePoint(c1, c2);
 2516                   }
 2517               }
 2518           }
 2519           return c2;
 2520       }
 2521   
 2522       /**
 2523        * Converts the specified character (Unicode code point) to its
 2524        * UTF-16 representation. If the specified code point is a BMP
 2525        * (Basic Multilingual Plane or Plane 0) value, the same value is
 2526        * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
 2527        * specified code point is a supplementary character, its
 2528        * surrogate values are stored in <code>dst[dstIndex]</code>
 2529        * (high-surrogate) and <code>dst[dstIndex+1]</code>
 2530        * (low-surrogate), and 2 is returned.
 2531        *
 2532        * @param  codePoint the character (Unicode code point) to be converted.
 2533        * @param  dst an array of <code>char</code> in which the
 2534        * <code>codePoint</code>'s UTF-16 value is stored.
 2535        * @param dstIndex the start index into the <code>dst</code>
 2536        * array where the converted value is stored.
 2537        * @return 1 if the code point is a BMP code point, 2 if the
 2538        * code point is a supplementary code point.
 2539        * @exception IllegalArgumentException if the specified
 2540        * <code>codePoint</code> is not a valid Unicode code point.
 2541        * @exception NullPointerException if the specified <code>dst</code> is null.
 2542        * @exception IndexOutOfBoundsException if <code>dstIndex</code>
 2543        * is negative or not less than <code>dst.length</code>, or if
 2544        * <code>dst</code> at <code>dstIndex</code> doesn't have enough
 2545        * array element(s) to store the resulting <code>char</code>
 2546        * value(s). (If <code>dstIndex</code> is equal to
 2547        * <code>dst.length-1</code> and the specified
 2548        * <code>codePoint</code> is a supplementary character, the
 2549        * high-surrogate value is not stored in
 2550        * <code>dst[dstIndex]</code>.)
 2551        * @since  1.5
 2552        */
 2553       public static int toChars(int codePoint, char[] dst, int dstIndex) {
 2554           if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
 2555               throw new IllegalArgumentException();
 2556           }
 2557           if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
 2558               dst[dstIndex] = (char) codePoint;
 2559               return 1;
 2560           }
 2561           toSurrogates(codePoint, dst, dstIndex);
 2562           return 2;
 2563       }
 2564   
 2565       /**
 2566        * Converts the specified character (Unicode code point) to its
 2567        * UTF-16 representation stored in a <code>char</code> array. If
 2568        * the specified code point is a BMP (Basic Multilingual Plane or
 2569        * Plane 0) value, the resulting <code>char</code> array has
 2570        * the same value as <code>codePoint</code>. If the specified code
 2571        * point is a supplementary code point, the resulting
 2572        * <code>char</code> array has the corresponding surrogate pair.
 2573        *
 2574        * @param  codePoint a Unicode code point
 2575        * @return a <code>char</code> array having
 2576        *         <code>codePoint</code>'s UTF-16 representation.
 2577        * @exception IllegalArgumentException if the specified
 2578        * <code>codePoint</code> is not a valid Unicode code point.
 2579        * @since  1.5
 2580        */
 2581       public static char[] toChars(int codePoint) {
 2582           if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
 2583               throw new IllegalArgumentException();
 2584           }
 2585           if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
 2586                   return new char[] { (char) codePoint };
 2587           }
 2588           char[] result = new char[2];
 2589           toSurrogates(codePoint, result, 0);
 2590           return result;
 2591       }
 2592   
 2593       static void toSurrogates(int codePoint, char[] dst, int index) {
 2594           int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
 2595           dst[index+1] = (char)((offset & 0x3ff) + MIN_LOW_SURROGATE);
 2596           dst[index] = (char)((offset >>> 10) + MIN_HIGH_SURROGATE);
 2597       }
 2598   
 2599       /**
 2600        * Returns the number of Unicode code points in the text range of
 2601        * the specified char sequence. The text range begins at the
 2602        * specified <code>beginIndex</code> and extends to the
 2603        * <code>char</code> at index <code>endIndex - 1</code>. Thus the
 2604        * length (in <code>char</code>s) of the text range is
 2605        * <code>endIndex-beginIndex</code>. Unpaired surrogates within
 2606        * the text range count as one code point each.
 2607        *
 2608        * @param seq the char sequence
 2609        * @param beginIndex the index to the first <code>char</code> of
 2610        * the text range.
 2611        * @param endIndex the index after the last <code>char</code> of
 2612        * the text range.
 2613        * @return the number of Unicode code points in the specified text
 2614        * range
 2615        * @exception NullPointerException if <code>seq</code> is null.
 2616        * @exception IndexOutOfBoundsException if the
 2617        * <code>beginIndex</code> is negative, or <code>endIndex</code>
 2618        * is larger than the length of the given sequence, or
 2619        * <code>beginIndex</code> is larger than <code>endIndex</code>.
 2620        * @since  1.5
 2621        */
 2622       public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
 2623           int length = seq.length();
 2624           if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
 2625               throw new IndexOutOfBoundsException();
 2626           }
 2627           int n = 0;
 2628           for (int i = beginIndex; i < endIndex; ) {
 2629               n++;
 2630               if (isHighSurrogate(seq.charAt(i++))) {
 2631                   if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
 2632                       i++;
 2633                   }
 2634               }
 2635           }
 2636           return n;
 2637       }
 2638   
 2639       /**
 2640        * Returns the number of Unicode code points in a subarray of the
 2641        * <code>char</code> array argument. The <code>offset</code>
 2642        * argument is the index of the first <code>char</code> of the
 2643        * subarray and the <code>count</code> argument specifies the
 2644        * length of the subarray in <code>char</code>s. Unpaired
 2645        * surrogates within the subarray count as one code point each.
 2646        *
 2647        * @param a the <code>char</code> array
 2648        * @param offset the index of the first <code>char</code> in the
 2649        * given <code>char</code> array
 2650        * @param count the length of the subarray in <code>char</code>s
 2651        * @return the number of Unicode code points in the specified subarray
 2652        * @exception NullPointerException if <code>a</code> is null.
 2653        * @exception IndexOutOfBoundsException if <code>offset</code> or
 2654        * <code>count</code> is negative, or if <code>offset +
 2655        * count</code> is larger than the length of the given array.
 2656        * @since  1.5
 2657        */
 2658       public static int codePointCount(char[] a, int offset, int count) {
 2659           if (count > a.length - offset || offset < 0 || count < 0) {
 2660               throw new IndexOutOfBoundsException();
 2661           }
 2662           return codePointCountImpl(a, offset, count);
 2663       }
 2664   
 2665       static int codePointCountImpl(char[] a, int offset, int count) {
 2666           int endIndex = offset + count;
 2667           int n = 0;
 2668           for (int i = offset; i < endIndex; ) {
 2669               n++;
 2670               if (isHighSurrogate(a[i++])) {
 2671                   if (i < endIndex && isLowSurrogate(a[i])) {
 2672                       i++;
 2673                   }
 2674               }
 2675           }
 2676           return n;
 2677       }
 2678   
 2679       /**
 2680        * Returns the index within the given char sequence that is offset
 2681        * from the given <code>index</code> by <code>codePointOffset</code>
 2682        * code points. Unpaired surrogates within the text range given by
 2683        * <code>index</code> and <code>codePointOffset</code> count as
 2684        * one code point each.
 2685        *
 2686        * @param seq the char sequence
 2687        * @param index the index to be offset
 2688        * @param codePointOffset the offset in code points
 2689        * @return the index within the char sequence
 2690        * @exception NullPointerException if <code>seq</code> is null.
 2691        * @exception IndexOutOfBoundsException if <code>index</code>
 2692        *   is negative or larger then the length of the char sequence,
 2693        *   or if <code>codePointOffset</code> is positive and the
 2694        *   subsequence starting with <code>index</code> has fewer than
 2695        *   <code>codePointOffset</code> code points, or if
 2696        *   <code>codePointOffset</code> is negative and the subsequence
 2697        *   before <code>index</code> has fewer than the absolute value
 2698        *   of <code>codePointOffset</code> code points.
 2699        * @since 1.5
 2700        */
 2701       public static int offsetByCodePoints(CharSequence seq, int index,
 2702                                            int codePointOffset) {
 2703           int length = seq.length();
 2704           if (index < 0 || index > length) {
 2705               throw new IndexOutOfBoundsException();
 2706           }
 2707   
 2708           int x = index;
 2709           if (codePointOffset >= 0) {
 2710               int i;
 2711               for (i = 0; x < length && i < codePointOffset; i++) {
 2712                   if (isHighSurrogate(seq.charAt(x++))) {
 2713                       if (x < length && isLowSurrogate(seq.charAt(x))) {
 2714                           x++;
 2715                       }
 2716                   }
 2717               }
 2718               if (i < codePointOffset) {
 2719                   throw new IndexOutOfBoundsException();
 2720               }
 2721           } else {
 2722               int i;
 2723               for (i = codePointOffset; x > 0 && i < 0; i++) {
 2724                   if (isLowSurrogate(seq.charAt(--x))) {
 2725                       if (x > 0 && isHighSurrogate(seq.charAt(x-1))) {
 2726                           x--;
 2727                       }
 2728                   }
 2729               }
 2730               if (i < 0) {
 2731                   throw new IndexOutOfBoundsException();
 2732               }
 2733           }
 2734           return x;
 2735       }
 2736   
 2737       /**
 2738        * Returns the index within the given <code>char</code> subarray
 2739        * that is offset from the given <code>index</code> by
 2740        * <code>codePointOffset</code> code points. The
 2741        * <code>start</code> and <code>count</code> arguments specify a
 2742        * subarray of the <code>char</code> array. Unpaired surrogates
 2743        * within the text range given by <code>index</code> and
 2744        * <code>codePointOffset</code> count as one code point each.
 2745        *
 2746        * @param a the <code>char</code> array
 2747        * @param start the index of the first <code>char</code> of the
 2748        * subarray
 2749        * @param count the length of the subarray in <code>char</code>s
 2750        * @param index the index to be offset
 2751        * @param codePointOffset the offset in code points
 2752        * @return the index within the subarray
 2753        * @exception NullPointerException if <code>a</code> is null.
 2754        * @exception IndexOutOfBoundsException
 2755        *   if <code>start</code> or <code>count</code> is negative,
 2756        *   or if <code>start + count</code> is larger than the length of
 2757        *   the given array,
 2758        *   or if <code>index</code> is less than <code>start</code> or
 2759        *   larger then <code>start + count</code>,
 2760        *   or if <code>codePointOffset</code> is positive and the text range
 2761        *   starting with <code>index</code> and ending with <code>start
 2762        *   + count - 1</code> has fewer than <code>codePointOffset</code> code
 2763        *   points,
 2764        *   or if <code>codePointOffset</code> is negative and the text range
 2765        *   starting with <code>start</code> and ending with <code>index
 2766        *   - 1</code> has fewer than the absolute value of
 2767        *   <code>codePointOffset</code> code points.
 2768        * @since 1.5
 2769        */
 2770       public static int offsetByCodePoints(char[] a, int start, int count,
 2771                                            int index, int codePointOffset) {
 2772           if (count > a.length-start || start < 0 || count < 0
 2773               || index < start || index > start+count) {
 2774               throw new IndexOutOfBoundsException();
 2775           }
 2776           return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
 2777       }
 2778   
 2779       static int offsetByCodePointsImpl(char[]a, int start, int count,
 2780                                         int index, int codePointOffset) {
 2781           int x = index;
 2782           if (codePointOffset >= 0) {
 2783               int limit = start + count;
 2784               int i;
 2785               for (i = 0; x < limit && i < codePointOffset; i++) {
 2786                   if (isHighSurrogate(a[x++])) {
 2787                       if (x < limit && isLowSurrogate(a[x])) {
 2788                           x++;
 2789                       }
 2790                   }
 2791               }
 2792               if (i < codePointOffset) {
 2793                   throw new IndexOutOfBoundsException();
 2794               }
 2795           } else {
 2796               int i;
 2797               for (i = codePointOffset; x > start && i < 0; i++) {
 2798                   if (isLowSurrogate(a[--x])) {
 2799                       if (x > start && isHighSurrogate(a[x-1])) {
 2800                           x--;
 2801                       }
 2802                   }
 2803               }
 2804               if (i < 0) {
 2805                   throw new IndexOutOfBoundsException();
 2806               }
 2807           }
 2808           return x;
 2809       }
 2810   
 2811      /**
 2812        * Determines if the specified character is a lowercase character.
 2813        * <p>
 2814        * A character is lowercase if its general category type, provided
 2815        * by <code>Character.getType(ch)</code>, is
 2816        * <code>LOWERCASE_LETTER</code>.
 2817        * <p>
 2818        * The following are examples of lowercase characters:
 2819        * <p><blockquote><pre>
 2820        * a b c d e f g h i j k l m n o p q r s t u v w x y z
 2821        * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
 2822        * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
 2823        * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
 2824        * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
 2825        * </pre></blockquote>
 2826        * <p> Many other Unicode characters are lowercase too.
 2827        *
 2828        * <p><b>Note:</b> This method cannot handle <a
 2829        * href="#supplementary"> supplementary characters</a>. To support
 2830        * all Unicode characters, including supplementary characters, use
 2831        * the {@link #isLowerCase(int)} method.
 2832        *
 2833        * @param   ch   the character to be tested.
 2834        * @return  <code>true</code> if the character is lowercase;
 2835        *          <code>false</code> otherwise.
 2836        * @see     java.lang.Character#isLowerCase(char)
 2837        * @see     java.lang.Character#isTitleCase(char)
 2838        * @see     java.lang.Character#toLowerCase(char)
 2839        * @see     java.lang.Character#getType(char)
 2840        */
 2841       public static boolean isLowerCase(char ch) {
 2842           return isLowerCase((int)ch);
 2843       }
 2844   
 2845       /**
 2846        * Determines if the specified character (Unicode code point) is a
 2847        * lowercase character.
 2848        * <p>
 2849        * A character is lowercase if its general category type, provided
 2850        * by {@link Character#getType getType(codePoint)}, is
 2851        * <code>LOWERCASE_LETTER</code>.
 2852        * <p>
 2853        * The following are examples of lowercase characters:
 2854        * <p><blockquote><pre>
 2855        * a b c d e f g h i j k l m n o p q r s t u v w x y z
 2856        * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
 2857        * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
 2858        * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
 2859        * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
 2860        * </pre></blockquote>
 2861        * <p> Many other Unicode characters are lowercase too.
 2862        *
 2863        * @param   codePoint the character (Unicode code point) to be tested.
 2864        * @return  <code>true</code> if the character is lowercase;
 2865        *          <code>false</code> otherwise.
 2866        * @see     java.lang.Character#isLowerCase(int)
 2867        * @see     java.lang.Character#isTitleCase(int)
 2868        * @see     java.lang.Character#toLowerCase(int)
 2869        * @see     java.lang.Character#getType(int)
 2870        * @since   1.5
 2871        */
 2872       public static boolean isLowerCase(int codePoint) {
 2873           return getType(codePoint) == Character.LOWERCASE_LETTER;
 2874       }
 2875   
 2876      /**
 2877        * Determines if the specified character is an uppercase character.
 2878        * <p>
 2879        * A character is uppercase if its general category type, provided by
 2880        * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
 2881        * <p>
 2882        * The following are examples of uppercase characters:
 2883        * <p><blockquote><pre>
 2884        * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
 2885        * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
 2886        * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
 2887        * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
 2888        * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
 2889        * </pre></blockquote>
 2890        * <p> Many other Unicode characters are uppercase too.<p>
 2891        *
 2892        * <p><b>Note:</b> This method cannot handle <a
 2893        * href="#supplementary"> supplementary characters</a>. To support
 2894        * all Unicode characters, including supplementary characters, use
 2895        * the {@link #isUpperCase(int)} method.
 2896        *
 2897        * @param   ch   the character to be tested.
 2898        * @return  <code>true</code> if the character is uppercase;
 2899        *          <code>false</code> otherwise.
 2900        * @see     java.lang.Character#isLowerCase(char)
 2901        * @see     java.lang.Character#isTitleCase(char)
 2902        * @see     java.lang.Character#toUpperCase(char)
 2903        * @see     java.lang.Character#getType(char)
 2904        * @since   1.0
 2905        */
 2906       public static boolean isUpperCase(char ch) {
 2907           return isUpperCase((int)ch);
 2908       }
 2909   
 2910       /**
 2911        * Determines if the specified character (Unicode code point) is an uppercase character.
 2912        * <p>
 2913        * A character is uppercase if its general category type, provided by
 2914        * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
 2915        * <p>
 2916        * The following are examples of uppercase characters:
 2917        * <p><blockquote><pre>
 2918        * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
 2919        * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
 2920        * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
 2921        * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
 2922        * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
 2923        * </pre></blockquote>
 2924        * <p> Many other Unicode characters are uppercase too.<p>
 2925        *
 2926        * @param   codePoint the character (Unicode code point) to be tested.
 2927        * @return  <code>true</code> if the character is uppercase;
 2928        *          <code>false</code> otherwise.
 2929        * @see     java.lang.Character#isLowerCase(int)
 2930        * @see     java.lang.Character#isTitleCase(int)
 2931        * @see     java.lang.Character#toUpperCase(int)
 2932        * @see     java.lang.Character#getType(int)
 2933        * @since   1.5
 2934        */
 2935       public static boolean isUpperCase(int codePoint) {
 2936           return getType(codePoint) == Character.UPPERCASE_LETTER;
 2937       }
 2938   
 2939       /**
 2940        * Determines if the specified character is a titlecase character.
 2941        * <p>
 2942        * A character is a titlecase character if its general
 2943        * category type, provided by <code>Character.getType(ch)</code>,
 2944        * is <code>TITLECASE_LETTER</code>.
 2945        * <p>
 2946        * Some characters look like pairs of Latin letters. For example, there
 2947        * is an uppercase letter that looks like "LJ" and has a corresponding
 2948        * lowercase letter that looks like "lj". A third form, which looks like "Lj",
 2949        * is the appropriate form to use when rendering a word in lowercase
 2950        * with initial capitals, as for a book title.
 2951        * <p>
 2952        * These are some of the Unicode characters for which this method returns
 2953        * <code>true</code>:
 2954        * <ul>
 2955        * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
 2956        * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
 2957        * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
 2958        * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
 2959        * </ul>
 2960        * <p> Many other Unicode characters are titlecase too.<p>
 2961        *
 2962        * <p><b>Note:</b> This method cannot handle <a
 2963        * href="#supplementary"> supplementary characters</a>. To support
 2964        * all Unicode characters, including supplementary characters, use
 2965        * the {@link #isTitleCase(int)} method.
 2966        *
 2967        * @param   ch   the character to be tested.
 2968        * @return  <code>true</code> if the character is titlecase;
 2969        *          <code>false</code> otherwise.
 2970        * @see     java.lang.Character#isLowerCase(char)
 2971        * @see     java.lang.Character#isUpperCase(char)
 2972        * @see     java.lang.Character#toTitleCase(char)
 2973        * @see     java.lang.Character#getType(char)
 2974        * @since   1.0.2
 2975        */
 2976       public static boolean isTitleCase(char ch) {
 2977           return isTitleCase((int)ch);
 2978       }
 2979   
 2980       /**
 2981        * Determines if the specified character (Unicode code point) is a titlecase character.
 2982        * <p>
 2983        * A character is a titlecase character if its general
 2984        * category type, provided by {@link Character#getType(int) getType(codePoint)},
 2985        * is <code>TITLECASE_LETTER</code>.
 2986        * <p>
 2987        * Some characters look like pairs of Latin letters. For example, there
 2988        * is an uppercase letter that looks like "LJ" and has a corresponding
 2989        * lowercase letter that looks like "lj". A third form, which looks like "Lj",
 2990        * is the appropriate form to use when rendering a word in lowercase
 2991        * with initial capitals, as for a book title.
 2992        * <p>
 2993        * These are some of the Unicode characters for which this method returns
 2994        * <code>true</code>:
 2995        * <ul>
 2996        * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
 2997        * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
 2998        * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
 2999        * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
 3000        * </ul>
 3001        * <p> Many other Unicode characters are titlecase too.<p>
 3002        *
 3003        * @param   codePoint the character (Unicode code point) to be tested.
 3004        * @return  <code>true</code> if the character is titlecase;
 3005        *          <code>false</code> otherwise.
 3006        * @see     java.lang.Character#isLowerCase(int)
 3007        * @see     java.lang.Character#isUpperCase(int)
 3008        * @see     java.lang.Character#toTitleCase(int)
 3009        * @see     java.lang.Character#getType(int)
 3010        * @since   1.5
 3011        */
 3012       public static boolean isTitleCase(int codePoint) {
 3013           return getType(codePoint) == Character.TITLECASE_LETTER;
 3014       }
 3015   
 3016       /**
 3017        * Determines if the specified character is a digit.
 3018        * <p>
 3019        * A character is a digit if its general category type, provided
 3020        * by <code>Character.getType(ch)</code>, is
 3021        * <code>DECIMAL_DIGIT_NUMBER</code>.
 3022        * <p>
 3023        * Some Unicode character ranges that contain digits:
 3024        * <ul>
 3025        * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
 3026        *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
 3027        * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
 3028        *     Arabic-Indic digits
 3029        * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
 3030        *     Extended Arabic-Indic digits
 3031        * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
 3032        *     Devanagari digits
 3033        * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
 3034        *     Fullwidth digits
 3035        * </ul>
 3036        *
 3037        * Many other character ranges contain digits as well.
 3038        *
 3039        * <p><b>Note:</b> This method cannot handle <a
 3040        * href="#supplementary"> supplementary characters</a>. To support
 3041        * all Unicode characters, including supplementary characters, use
 3042        * the {@link #isDigit(int)} method.
 3043        *
 3044        * @param   ch   the character to be tested.
 3045        * @return  <code>true</code> if the character is a digit;
 3046        *          <code>false</code> otherwise.
 3047        * @see     java.lang.Character#digit(char, int)
 3048        * @see     java.lang.Character#forDigit(int, int)
 3049        * @see     java.lang.Character#getType(char)
 3050        */
 3051       public static boolean isDigit(char ch) {
 3052           return isDigit((int)ch);
 3053       }
 3054   
 3055       /**
 3056        * Determines if the specified character (Unicode code point) is a digit.
 3057        * <p>
 3058        * A character is a digit if its general category type, provided
 3059        * by {@link Character#getType(int) getType(codePoint)}, is
 3060        * <code>DECIMAL_DIGIT_NUMBER</code>.
 3061        * <p>
 3062        * Some Unicode character ranges that contain digits:
 3063        * <ul>
 3064        * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
 3065        *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
 3066        * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
 3067        *     Arabic-Indic digits
 3068        * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
 3069        *     Extended Arabic-Indic digits
 3070        * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
 3071        *     Devanagari digits
 3072        * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
 3073        *     Fullwidth digits
 3074        * </ul>
 3075        *
 3076        * Many other character ranges contain digits as well.
 3077        *
 3078        * @param   codePoint the character (Unicode code point) to be tested.
 3079        * @return  <code>true</code> if the character is a digit;
 3080        *          <code>false</code> otherwise.
 3081        * @see     java.lang.Character#forDigit(int, int)
 3082        * @see     java.lang.Character#getType(int)
 3083        * @since   1.5
 3084        */
 3085       public static boolean isDigit(int codePoint) {
 3086           return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
 3087       }
 3088   
 3089       /**
 3090        * Determines if a character is defined in Unicode.
 3091        * <p>
 3092        * A character is defined if at least one of the following is true:
 3093        * <ul>
 3094        * <li>It has an entry in the UnicodeData file.
 3095        * <li>It has a value in a range defined by the UnicodeData file.
 3096        * </ul>
 3097        *
 3098        * <p><b>Note:</b> This method cannot handle <a
 3099        * href="#supplementary"> supplementary characters</a>. To support
 3100        * all Unicode characters, including supplementary characters, use
 3101        * the {@link #isDefined(int)} method.
 3102        *
 3103        * @param   ch   the character to be tested
 3104        * @return  <code>true</code> if the character has a defined meaning
 3105        *          in Unicode; <code>false</code> otherwise.
 3106        * @see     java.lang.Character#isDigit(char)
 3107        * @see     java.lang.Character#isLetter(char)
 3108        * @see     java.lang.Character#isLetterOrDigit(char)
 3109        * @see     java.lang.Character#isLowerCase(char)
 3110        * @see     java.lang.Character#isTitleCase(char)
 3111        * @see     java.lang.Character#isUpperCase(char)
 3112        * @since   1.0.2
 3113        */
 3114       public static boolean isDefined(char ch) {
 3115           return isDefined((int)ch);
 3116       }
 3117   
 3118       /**
 3119        * Determines if a character (Unicode code point) is defined in Unicode.
 3120        * <p>
 3121        * A character is defined if at least one of the following is true:
 3122        * <ul>
 3123        * <li>It has an entry in the UnicodeData file.
 3124        * <li>It has a value in a range defined by the UnicodeData file.
 3125        * </ul>
 3126        *
 3127        * @param   codePoint the character (Unicode code point) to be tested.
 3128        * @return  <code>true</code> if the character has a defined meaning
 3129        *          in Unicode; <code>false</code> otherwise.
 3130        * @see     java.lang.Character#isDigit(int)
 3131        * @see     java.lang.Character#isLetter(int)
 3132        * @see     java.lang.Character#isLetterOrDigit(int)
 3133        * @see     java.lang.Character#isLowerCase(int)
 3134        * @see     java.lang.Character#isTitleCase(int)
 3135        * @see     java.lang.Character#isUpperCase(int)
 3136        * @since   1.5
 3137        */
 3138       public static boolean isDefined(int codePoint) {
 3139           return getType(codePoint) != Character.UNASSIGNED;
 3140       }
 3141   
 3142       /**
 3143        * Determines if the specified character is a letter.
 3144        * <p>
 3145        * A character is considered to be a letter if its general
 3146        * category type, provided by <code>Character.getType(ch)</code>,
 3147        * is any of the following:
 3148        * <ul>
 3149        * <li> <code>UPPERCASE_LETTER</code>
 3150        * <li> <code>LOWERCASE_LETTER</code>
 3151        * <li> <code>TITLECASE_LETTER</code>
 3152        * <li> <code>MODIFIER_LETTER</code>
 3153        * <li> <code>OTHER_LETTER</code>
 3154        * </ul>
 3155        *
 3156        * Not all letters have case. Many characters are
 3157        * letters but are neither uppercase nor lowercase nor titlecase.
 3158        *
 3159        * <p><b>Note:</b> This method cannot handle <a
 3160        * href="#supplementary"> supplementary characters</a>. To support
 3161        * all Unicode characters, including supplementary characters, use
 3162        * the {@link #isLetter(int)} method.
 3163        *
 3164        * @param   ch   the character to be tested.
 3165        * @return  <code>true</code> if the character is a letter;
 3166        *          <code>false</code> otherwise.
 3167        * @see     java.lang.Character#isDigit(char)
 3168        * @see     java.lang.Character#isJavaIdentifierStart(char)
 3169        * @see     java.lang.Character#isJavaLetter(char)
 3170        * @see     java.lang.Character#isJavaLetterOrDigit(char)
 3171        * @see     java.lang.Character#isLetterOrDigit(char)
 3172        * @see     java.lang.Character#isLowerCase(char)
 3173        * @see     java.lang.Character#isTitleCase(char)
 3174        * @see     java.lang.Character#isUnicodeIdentifierStart(char)
 3175        * @see     java.lang.Character#isUpperCase(char)
 3176        */
 3177       public static boolean isLetter(char ch) {
 3178           return isLetter((int)ch);
 3179       }
 3180   
 3181       /**
 3182        * Determines if the specified character (Unicode code point) is a letter.
 3183        * <p>
 3184        * A character is considered to be a letter if its general
 3185        * category type, provided by {@link Character#getType(int) getType(codePoint)},
 3186        * is any of the following:
 3187        * <ul>
 3188        * <li> <code>UPPERCASE_LETTER</code>
 3189        * <li> <code>LOWERCASE_LETTER</code>
 3190        * <li> <code>TITLECASE_LETTER</code>
 3191        * <li> <code>MODIFIER_LETTER</code>
 3192        * <li> <code>OTHER_LETTER</code>
 3193        * </ul>
 3194        *
 3195        * Not all letters have case. Many characters are
 3196        * letters but are neither uppercase nor lowercase nor titlecase.
 3197        *
 3198        * @param   codePoint the character (Unicode code point) to be tested.
 3199        * @return  <code>true</code> if the character is a letter;
 3200        *          <code>false</code> otherwise.
 3201        * @see     java.lang.Character#isDigit(int)
 3202        * @see     java.lang.Character#isJavaIdentifierStart(int)
 3203        * @see     java.lang.Character#isLetterOrDigit(int)
 3204        * @see     java.lang.Character#isLowerCase(int)
 3205        * @see     java.lang.Character#isTitleCase(int)
 3206        * @see     java.lang.Character#isUnicodeIdentifierStart(int)
 3207        * @see     java.lang.Character#isUpperCase(int)
 3208        * @since   1.5
 3209        */
 3210       public static boolean isLetter(int codePoint) {
 3211           return ((((1 << Character.UPPERCASE_LETTER) |
 3212               (1 << Character.LOWERCASE_LETTER) |
 3213               (1 << Character.TITLECASE_LETTER) |
 3214               (1 << Character.MODIFIER_LETTER) |
 3215               (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
 3216               != 0;
 3217       }
 3218   
 3219       /**
 3220        * Determines if the specified character is a letter or digit.
 3221        * <p>
 3222        * A character is considered to be a letter or digit if either
 3223        * <code>Character.isLetter(char ch)</code> or
 3224        * <code>Character.isDigit(char ch)</code> returns
 3225        * <code>true</code> for the character.
 3226        *
 3227        * <p><b>Note:</b> This method cannot handle <a
 3228        * href="#supplementary"> supplementary characters</a>. To support
 3229        * all Unicode characters, including supplementary characters, use
 3230        * the {@link #isLetterOrDigit(int)} method.
 3231        *
 3232        * @param   ch   the character to be tested.
 3233        * @return  <code>true</code> if the character is a letter or digit;
 3234        *          <code>false</code> otherwise.
 3235        * @see     java.lang.Character#isDigit(char)
 3236        * @see     java.lang.Character#isJavaIdentifierPart(char)
 3237        * @see     java.lang.Character#isJavaLetter(char)
 3238        * @see     java.lang.Character#isJavaLetterOrDigit(char)
 3239        * @see     java.lang.Character#isLetter(char)
 3240        * @see     java.lang.Character#isUnicodeIdentifierPart(char)
 3241        * @since   1.0.2
 3242        */
 3243       public static boolean isLetterOrDigit(char ch) {
 3244           return isLetterOrDigit((int)ch);
 3245       }
 3246   
 3247       /**
 3248        * Determines if the specified character (Unicode code point) is a letter or digit.
 3249        * <p>
 3250        * A character is considered to be a letter or digit if either
 3251        * {@link #isLetter(int) isLetter(codePoint)} or
 3252        * {@link #isDigit(int) isDigit(codePoint)} returns
 3253        * <code>true</code> for the character.
 3254        *
 3255        * @param   codePoint the character (Unicode code point) to be tested.
 3256        * @return  <code>true</code> if the character is a letter or digit;
 3257        *          <code>false</code> otherwise.
 3258        * @see     java.lang.Character#isDigit(int)
 3259        * @see     java.lang.Character#isJavaIdentifierPart(int)
 3260        * @see     java.lang.Character#isLetter(int)
 3261        * @see     java.lang.Character#isUnicodeIdentifierPart(int)
 3262        * @since   1.5
 3263        */
 3264       public static boolean isLetterOrDigit(int codePoint) {
 3265           return ((((1 << Character.UPPERCASE_LETTER) |
 3266               (1 << Character.LOWERCASE_LETTER) |
 3267               (1 << Character.TITLECASE_LETTER) |
 3268               (1 << Character.MODIFIER_LETTER) |
 3269               (1 << Character.OTHER_LETTER) |
 3270               (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
 3271               != 0;
 3272       }
 3273   
 3274       /**
 3275        * Determines if the specified character is permissible as the first
 3276        * character in a Java identifier.
 3277        * <p>
 3278        * A character may start a Java identifier if and only if
 3279        * one of the following is true:
 3280        * <ul>
 3281        * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
 3282        * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
 3283        * <li> ch is a currency symbol (such as "$")
 3284        * <li> ch is a connecting punctuation character (such as "_").
 3285        * </ul>
 3286        *
 3287        * @param   ch the character to be tested.
 3288        * @return  <code>true</code> if the character may start a Java
 3289        *          identifier; <code>false</code> otherwise.
 3290        * @see     java.lang.Character#isJavaLetterOrDigit(char)
 3291        * @see     java.lang.Character#isJavaIdentifierStart(char)
 3292        * @see     java.lang.Character#isJavaIdentifierPart(char)
 3293        * @see     java.lang.Character#isLetter(char)
 3294        * @see     java.lang.Character#isLetterOrDigit(char)
 3295        * @see     java.lang.Character#isUnicodeIdentifierStart(char)
 3296        * @since   1.02
 3297        * @deprecated Replaced by isJavaIdentifierStart(char).
 3298        */
 3299       @Deprecated
 3300       public static boolean isJavaLetter(char ch) {
 3301           return isJavaIdentifierStart(ch);
 3302       }
 3303   
 3304       /**
 3305        * Determines if the specified character may be part of a Java
 3306        * identifier as other than the first character.
 3307        * <p>
 3308        * A character may be part of a Java identifier if and only if any
 3309        * of the following are true:
 3310        * <ul>
 3311        * <li>  it is a letter
 3312        * <li>  it is a currency symbol (such as <code>'$'</code>)
 3313        * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
 3314        * <li>  it is a digit
 3315        * <li>  it is a numeric letter (such as a Roman numeral character)
 3316        * <li>  it is a combining mark
 3317        * <li>  it is a non-spacing mark
 3318        * <li> <code>isIdentifierIgnorable</code> returns
 3319        * <code>true</code> for the character.
 3320        * </ul>
 3321        *
 3322        * @param   ch the character to be tested.
 3323        * @return  <code>true</code> if the character may be part of a
 3324        *          Java identifier; <code>false</code> otherwise.
 3325        * @see     java.lang.Character#isJavaLetter(char)
 3326        * @see     java.lang.Character#isJavaIdentifierStart(char)
 3327        * @see     java.lang.Character#isJavaIdentifierPart(char)
 3328        * @see     java.lang.Character#isLetter(char)
 3329        * @see     java.lang.Character#isLetterOrDigit(char)
 3330        * @see     java.lang.Character#isUnicodeIdentifierPart(char)
 3331        * @see     java.lang.Character#isIdentifierIgnorable(char)
 3332        * @since   1.02
 3333        * @deprecated Replaced by isJavaIdentifierPart(char).
 3334        */
 3335       @Deprecated
 3336       public static boolean isJavaLetterOrDigit(char ch) {
 3337           return isJavaIdentifierPart(ch);
 3338       }
 3339   
 3340       /**
 3341        * Determines if the specified character is
 3342        * permissible as the first character in a Java identifier.
 3343        * <p>
 3344        * A character may start a Java identifier if and only if
 3345        * one of the following conditions is true:
 3346        * <ul>
 3347        * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
 3348        * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
 3349        * <li> ch is a currency symbol (such as "$")
 3350        * <li> ch is a connecting punctuation character (such as "_").
 3351        * </ul>
 3352        *
 3353        * <p><b>Note:</b> This method cannot handle <a
 3354        * href="#supplementary"> supplementary characters</a>. To support
 3355        * all Unicode characters, including supplementary characters, use
 3356        * the {@link #isJavaIdentifierStart(int)} method.
 3357        *
 3358        * @param   ch the character to be tested.
 3359        * @return  <code>true</code> if the character may start a Java identifier;
 3360        *          <code>false</code> otherwise.
 3361        * @see     java.lang.Character#isJavaIdentifierPart(char)
 3362        * @see     java.lang.Character#isLetter(char)
 3363        * @see     java.lang.Character#isUnicodeIdentifierStart(char)
 3364        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 3365        * @since   1.1
 3366        */
 3367       public static boolean isJavaIdentifierStart(char ch) {
 3368           return isJavaIdentifierStart((int)ch);
 3369       }
 3370   
 3371       /**
 3372        * Determines if the character (Unicode code point) is
 3373        * permissible as the first character in a Java identifier.
 3374        * <p>
 3375        * A character may start a Java identifier if and only if
 3376        * one of the following conditions is true:
 3377        * <ul>
 3378        * <li> {@link #isLetter(int) isLetter(codePoint)}
 3379        *      returns <code>true</code>
 3380        * <li> {@link #getType(int) getType(codePoint)}
 3381        *      returns <code>LETTER_NUMBER</code>
 3382        * <li> the referenced character is a currency symbol (such as "$")
 3383        * <li> the referenced character is a connecting punctuation character
 3384        *      (such as "_").
 3385        * </ul>
 3386        *
 3387        * @param   codePoint the character (Unicode code point) to be tested.
 3388        * @return  <code>true</code> if the character may start a Java identifier;
 3389        *          <code>false</code> otherwise.
 3390        * @see     java.lang.Character#isJavaIdentifierPart(int)
 3391        * @see     java.lang.Character#isLetter(int)
 3392        * @see     java.lang.Character#isUnicodeIdentifierStart(int)
 3393        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 3394        * @since   1.5
 3395        */
 3396       public static boolean isJavaIdentifierStart(int codePoint) {
 3397           return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
 3398       }
 3399   
 3400       /**
 3401        * Determines if the specified character may be part of a Java
 3402        * identifier as other than the first character.
 3403        * <p>
 3404        * A character may be part of a Java identifier if any of the following
 3405        * are true:
 3406        * <ul>
 3407        * <li>  it is a letter
 3408        * <li>  it is a currency symbol (such as <code>'$'</code>)
 3409        * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
 3410        * <li>  it is a digit
 3411        * <li>  it is a numeric letter (such as a Roman numeral character)
 3412        * <li>  it is a combining mark
 3413        * <li>  it is a non-spacing mark
 3414        * <li> <code>isIdentifierIgnorable</code> returns
 3415        * <code>true</code> for the character
 3416        * </ul>
 3417        *
 3418        * <p><b>Note:</b> This method cannot handle <a
 3419        * href="#supplementary"> supplementary characters</a>. To support
 3420        * all Unicode characters, including supplementary characters, use
 3421        * the {@link #isJavaIdentifierPart(int)} method.
 3422        *
 3423        * @param   ch      the character to be tested.
 3424        * @return <code>true</code> if the character may be part of a
 3425        *          Java identifier; <code>false</code> otherwise.
 3426        * @see     java.lang.Character#isIdentifierIgnorable(char)
 3427        * @see     java.lang.Character#isJavaIdentifierStart(char)
 3428        * @see     java.lang.Character#isLetterOrDigit(char)
 3429        * @see     java.lang.Character#isUnicodeIdentifierPart(char)
 3430        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 3431        * @since   1.1
 3432        */
 3433       public static boolean isJavaIdentifierPart(char ch) {
 3434           return isJavaIdentifierPart((int)ch);
 3435       }
 3436   
 3437       /**
 3438        * Determines if the character (Unicode code point) may be part of a Java
 3439        * identifier as other than the first character.
 3440        * <p>
 3441        * A character may be part of a Java identifier if any of the following
 3442        * are true:
 3443        * <ul>
 3444        * <li>  it is a letter
 3445        * <li>  it is a currency symbol (such as <code>'$'</code>)
 3446        * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
 3447        * <li>  it is a digit
 3448        * <li>  it is a numeric letter (such as a Roman numeral character)
 3449        * <li>  it is a combining mark
 3450        * <li>  it is a non-spacing mark
 3451        * <li> {@link #isIdentifierIgnorable(int)
 3452        * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
 3453        * the character
 3454        * </ul>
 3455        *
 3456        * @param   codePoint the character (Unicode code point) to be tested.
 3457        * @return <code>true</code> if the character may be part of a
 3458        *          Java identifier; <code>false</code> otherwise.
 3459        * @see     java.lang.Character#isIdentifierIgnorable(int)
 3460        * @see     java.lang.Character#isJavaIdentifierStart(int)
 3461        * @see     java.lang.Character#isLetterOrDigit(int)
 3462        * @see     java.lang.Character#isUnicodeIdentifierPart(int)
 3463        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 3464        * @since   1.5
 3465        */
 3466       public static boolean isJavaIdentifierPart(int codePoint) {
 3467           return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
 3468       }
 3469   
 3470       /**
 3471        * Determines if the specified character is permissible as the
 3472        * first character in a Unicode identifier.
 3473        * <p>
 3474        * A character may start a Unicode identifier if and only if
 3475        * one of the following conditions is true:
 3476        * <ul>
 3477        * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
 3478        * <li> {@link #getType(char) getType(ch)} returns
 3479        *      <code>LETTER_NUMBER</code>.
 3480        * </ul>
 3481        *
 3482        * <p><b>Note:</b> This method cannot handle <a
 3483        * href="#supplementary"> supplementary characters</a>. To support
 3484        * all Unicode characters, including supplementary characters, use
 3485        * the {@link #isUnicodeIdentifierStart(int)} method.
 3486        *
 3487        * @param   ch      the character to be tested.
 3488        * @return  <code>true</code> if the character may start a Unicode
 3489        *          identifier; <code>false</code> otherwise.
 3490        * @see     java.lang.Character#isJavaIdentifierStart(char)
 3491        * @see     java.lang.Character#isLetter(char)
 3492        * @see     java.lang.Character#isUnicodeIdentifierPart(char)
 3493        * @since   1.1
 3494        */
 3495       public static boolean isUnicodeIdentifierStart(char ch) {
 3496           return isUnicodeIdentifierStart((int)ch);
 3497       }
 3498   
 3499       /**
 3500        * Determines if the specified character (Unicode code point) is permissible as the
 3501        * first character in a Unicode identifier.
 3502        * <p>
 3503        * A character may start a Unicode identifier if and only if
 3504        * one of the following conditions is true:
 3505        * <ul>
 3506        * <li> {@link #isLetter(int) isLetter(codePoint)}
 3507        *      returns <code>true</code>
 3508        * <li> {@link #getType(int) getType(codePoint)}
 3509        *      returns <code>LETTER_NUMBER</code>.
 3510        * </ul>
 3511        * @param   codePoint the character (Unicode code point) to be tested.
 3512        * @return  <code>true</code> if the character may start a Unicode
 3513        *          identifier; <code>false</code> otherwise.
 3514        * @see     java.lang.Character#isJavaIdentifierStart(int)
 3515        * @see     java.lang.Character#isLetter(int)
 3516        * @see     java.lang.Character#isUnicodeIdentifierPart(int)
 3517        * @since   1.5
 3518        */
 3519       public static boolean isUnicodeIdentifierStart(int codePoint) {
 3520           return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
 3521       }
 3522   
 3523       /**
 3524        * Determines if the specified character may be part of a Unicode
 3525        * identifier as other than the first character.
 3526        * <p>
 3527        * A character may be part of a Unicode identifier if and only if
 3528        * one of the following statements is true:
 3529        * <ul>
 3530        * <li>  it is a letter
 3531        * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
 3532        * <li>  it is a digit
 3533        * <li>  it is a numeric letter (such as a Roman numeral character)
 3534        * <li>  it is a combining mark
 3535        * <li>  it is a non-spacing mark
 3536        * <li> <code>isIdentifierIgnorable</code> returns
 3537        * <code>true</code> for this character.
 3538        * </ul>
 3539        *
 3540        * <p><b>Note:</b> This method cannot handle <a
 3541        * href="#supplementary"> supplementary characters</a>. To support
 3542        * all Unicode characters, including supplementary characters, use
 3543        * the {@link #isUnicodeIdentifierPart(int)} method.
 3544        *
 3545        * @param   ch      the character to be tested.
 3546        * @return  <code>true</code> if the character may be part of a
 3547        *          Unicode identifier; <code>false</code> otherwise.
 3548        * @see     java.lang.Character#isIdentifierIgnorable(char)
 3549        * @see     java.lang.Character#isJavaIdentifierPart(char)
 3550        * @see     java.lang.Character#isLetterOrDigit(char)
 3551        * @see     java.lang.Character#isUnicodeIdentifierStart(char)
 3552        * @since   1.1
 3553        */
 3554       public static boolean isUnicodeIdentifierPart(char ch) {
 3555           return isUnicodeIdentifierPart((int)ch);
 3556       }
 3557   
 3558       /**
 3559        * Determines if the specified character (Unicode code point) may be part of a Unicode
 3560        * identifier as other than the first character.
 3561        * <p>
 3562        * A character may be part of a Unicode identifier if and only if
 3563        * one of the following statements is true:
 3564        * <ul>
 3565        * <li>  it is a letter
 3566        * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
 3567        * <li>  it is a digit
 3568        * <li>  it is a numeric letter (such as a Roman numeral character)
 3569        * <li>  it is a combining mark
 3570        * <li>  it is a non-spacing mark
 3571        * <li> <code>isIdentifierIgnorable</code> returns
 3572        * <code>true</code> for this character.
 3573        * </ul>
 3574        * @param   codePoint the character (Unicode code point) to be tested.
 3575        * @return  <code>true</code> if the character may be part of a
 3576        *          Unicode identifier; <code>false</code> otherwise.
 3577        * @see     java.lang.Character#isIdentifierIgnorable(int)
 3578        * @see     java.lang.Character#isJavaIdentifierPart(int)
 3579        * @see     java.lang.Character#isLetterOrDigit(int)
 3580        * @see     java.lang.Character#isUnicodeIdentifierStart(int)
 3581        * @since   1.5
 3582        */
 3583       public static boolean isUnicodeIdentifierPart(int codePoint) {
 3584           return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
 3585       }
 3586   
 3587       /**
 3588        * Determines if the specified character should be regarded as
 3589        * an ignorable character in a Java identifier or a Unicode identifier.
 3590        * <p>
 3591        * The following Unicode characters are ignorable in a Java identifier
 3592        * or a Unicode identifier:
 3593        * <ul>
 3594        * <li>ISO control characters that are not whitespace
 3595        * <ul>
 3596        * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
 3597        * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
 3598        * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
 3599        * </ul>
 3600        *
 3601        * <li>all characters that have the <code>FORMAT</code> general
 3602        * category value
 3603        * </ul>
 3604        *
 3605        * <p><b>Note:</b> This method cannot handle <a
 3606        * href="#supplementary"> supplementary characters</a>. To support
 3607        * all Unicode characters, including supplementary characters, use
 3608        * the {@link #isIdentifierIgnorable(int)} method.
 3609        *
 3610        * @param   ch      the character to be tested.
 3611        * @return  <code>true</code> if the character is an ignorable control
 3612        *          character that may be part of a Java or Unicode identifier;
 3613        *           <code>false</code> otherwise.
 3614        * @see     java.lang.Character#isJavaIdentifierPart(char)
 3615        * @see     java.lang.Character#isUnicodeIdentifierPart(char)
 3616        * @since   1.1
 3617        */
 3618       public static boolean isIdentifierIgnorable(char ch) {
 3619           return isIdentifierIgnorable((int)ch);
 3620       }
 3621   
 3622       /**
 3623        * Determines if the specified character (Unicode code point) should be regarded as
 3624        * an ignorable character in a Java identifier or a Unicode identifier.
 3625        * <p>
 3626        * The following Unicode characters are ignorable in a Java identifier
 3627        * or a Unicode identifier:
 3628        * <ul>
 3629        * <li>ISO control characters that are not whitespace
 3630        * <ul>
 3631        * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
 3632        * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
 3633        * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
 3634        * </ul>
 3635        *
 3636        * <li>all characters that have the <code>FORMAT</code> general
 3637        * category value
 3638        * </ul>
 3639        *
 3640        * @param   codePoint the character (Unicode code point) to be tested.
 3641        * @return  <code>true</code> if the character is an ignorable control
 3642        *          character that may be part of a Java or Unicode identifier;
 3643        *          <code>false</code> otherwise.
 3644        * @see     java.lang.Character#isJavaIdentifierPart(int)
 3645        * @see     java.lang.Character#isUnicodeIdentifierPart(int)
 3646        * @since   1.5
 3647        */
 3648       public static boolean isIdentifierIgnorable(int codePoint) {
 3649           return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
 3650       }
 3651   
 3652       /**
 3653        * Converts the character argument to lowercase using case
 3654        * mapping information from the UnicodeData file.
 3655        * <p>
 3656        * Note that
 3657        * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
 3658        * does not always return <code>true</code> for some ranges of
 3659        * characters, particularly those that are symbols or ideographs.
 3660        *
 3661        * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
 3662        * characters to lowercase. <code>String</code> case mapping methods
 3663        * have several benefits over <code>Character</code> case mapping methods.
 3664        * <code>String</code> case mapping methods can perform locale-sensitive
 3665        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 3666        * the <code>Character</code> case mapping methods cannot.
 3667        *
 3668        * <p><b>Note:</b> This method cannot handle <a
 3669        * href="#supplementary"> supplementary characters</a>. To support
 3670        * all Unicode characters, including supplementary characters, use
 3671        * the {@link #toLowerCase(int)} method.
 3672        *
 3673        * @param   ch   the character to be converted.
 3674        * @return  the lowercase equivalent of the character, if any;
 3675        *          otherwise, the character itself.
 3676        * @see     java.lang.Character#isLowerCase(char)
 3677        * @see     java.lang.String#toLowerCase()
 3678        */
 3679       public static char toLowerCase(char ch) {
 3680           return (char)toLowerCase((int)ch);
 3681       }
 3682   
 3683       /**
 3684        * Converts the character (Unicode code point) argument to
 3685        * lowercase using case mapping information from the UnicodeData
 3686        * file.
 3687        *
 3688        * <p> Note that
 3689        * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
 3690        * does not always return <code>true</code> for some ranges of
 3691        * characters, particularly those that are symbols or ideographs.
 3692        *
 3693        * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
 3694        * characters to lowercase. <code>String</code> case mapping methods
 3695        * have several benefits over <code>Character</code> case mapping methods.
 3696        * <code>String</code> case mapping methods can perform locale-sensitive
 3697        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 3698        * the <code>Character</code> case mapping methods cannot.
 3699        *
 3700        * @param   codePoint   the character (Unicode code point) to be converted.
 3701        * @return  the lowercase equivalent of the character (Unicode code
 3702        *          point), if any; otherwise, the character itself.
 3703        * @see     java.lang.Character#isLowerCase(int)
 3704        * @see     java.lang.String#toLowerCase()
 3705        *
 3706        * @since   1.5
 3707        */
 3708       public static int toLowerCase(int codePoint) {
 3709           return CharacterData.of(codePoint).toLowerCase(codePoint);
 3710       }
 3711   
 3712       /**
 3713        * Converts the character argument to uppercase using case mapping
 3714        * information from the UnicodeData file.
 3715        * <p>
 3716        * Note that
 3717        * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
 3718        * does not always return <code>true</code> for some ranges of
 3719        * characters, particularly those that are symbols or ideographs.
 3720        *
 3721        * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
 3722        * characters to uppercase. <code>String</code> case mapping methods
 3723        * have several benefits over <code>Character</code> case mapping methods.
 3724        * <code>String</code> case mapping methods can perform locale-sensitive
 3725        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 3726        * the <code>Character</code> case mapping methods cannot.
 3727        *
 3728        * <p><b>Note:</b> This method cannot handle <a
 3729        * href="#supplementary"> supplementary characters</a>. To support
 3730        * all Unicode characters, including supplementary characters, use
 3731        * the {@link #toUpperCase(int)} method.
 3732        *
 3733        * @param   ch   the character to be converted.
 3734        * @return  the uppercase equivalent of the character, if any;
 3735        *          otherwise, the character itself.
 3736        * @see     java.lang.Character#isUpperCase(char)
 3737        * @see     java.lang.String#toUpperCase()
 3738        */
 3739       public static char toUpperCase(char ch) {
 3740           return (char)toUpperCase((int)ch);
 3741       }
 3742   
 3743       /**
 3744        * Converts the character (Unicode code point) argument to
 3745        * uppercase using case mapping information from the UnicodeData
 3746        * file.
 3747        *
 3748        * <p>Note that
 3749        * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
 3750        * does not always return <code>true</code> for some ranges of
 3751        * characters, particularly those that are symbols or ideographs.
 3752        *
 3753        * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
 3754        * characters to uppercase. <code>String</code> case mapping methods
 3755        * have several benefits over <code>Character</code> case mapping methods.
 3756        * <code>String</code> case mapping methods can perform locale-sensitive
 3757        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 3758        * the <code>Character</code> case mapping methods cannot.
 3759        *
 3760        * @param   codePoint   the character (Unicode code point) to be converted.
 3761        * @return  the uppercase equivalent of the character, if any;
 3762        *          otherwise, the character itself.
 3763        * @see     java.lang.Character#isUpperCase(int)
 3764        * @see     java.lang.String#toUpperCase()
 3765        *
 3766        * @since   1.5
 3767        */
 3768       public static int toUpperCase(int codePoint) {
 3769           return CharacterData.of(codePoint).toUpperCase(codePoint);
 3770       }
 3771   
 3772       /**
 3773        * Converts the character argument to titlecase using case mapping
 3774        * information from the UnicodeData file. If a character has no
 3775        * explicit titlecase mapping and is not itself a titlecase char
 3776        * according to UnicodeData, then the uppercase mapping is
 3777        * returned as an equivalent titlecase mapping. If the
 3778        * <code>char</code> argument is already a titlecase
 3779        * <code>char</code>, the same <code>char</code> value will be
 3780        * returned.
 3781        * <p>
 3782        * Note that
 3783        * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
 3784        * does not always return <code>true</code> for some ranges of
 3785        * characters.
 3786        *
 3787        * <p><b>Note:</b> This method cannot handle <a
 3788        * href="#supplementary"> supplementary characters</a>. To support
 3789        * all Unicode characters, including supplementary characters, use
 3790        * the {@link #toTitleCase(int)} method.
 3791        *
 3792        * @param   ch   the character to be converted.
 3793        * @return  the titlecase equivalent of the character, if any;
 3794        *          otherwise, the character itself.
 3795        * @see     java.lang.Character#isTitleCase(char)
 3796        * @see     java.lang.Character#toLowerCase(char)
 3797        * @see     java.lang.Character#toUpperCase(char)
 3798        * @since   1.0.2
 3799        */
 3800       public static char toTitleCase(char ch) {
 3801           return (char)toTitleCase((int)ch);
 3802       }
 3803   
 3804       /**
 3805        * Converts the character (Unicode code point) argument to titlecase using case mapping
 3806        * information from the UnicodeData file. If a character has no
 3807        * explicit titlecase mapping and is not itself a titlecase char
 3808        * according to UnicodeData, then the uppercase mapping is
 3809        * returned as an equivalent titlecase mapping. If the
 3810        * character argument is already a titlecase
 3811        * character, the same character value will be
 3812        * returned.
 3813        *
 3814        * <p>Note that
 3815        * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
 3816        * does not always return <code>true</code> for some ranges of
 3817        * characters.
 3818        *
 3819        * @param   codePoint   the character (Unicode code point) to be converted.
 3820        * @return  the titlecase equivalent of the character, if any;
 3821        *          otherwise, the character itself.
 3822        * @see     java.lang.Character#isTitleCase(int)
 3823        * @see     java.lang.Character#toLowerCase(int)
 3824        * @see     java.lang.Character#toUpperCase(int)
 3825        * @since   1.5
 3826        */
 3827       public static int toTitleCase(int codePoint) {
 3828           return CharacterData.of(codePoint).toTitleCase(codePoint);
 3829       }
 3830   
 3831       /**
 3832        * Returns the numeric value of the character <code>ch</code> in the
 3833        * specified radix.
 3834        * <p>
 3835        * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
 3836        * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
 3837        * value of <code>ch</code> is not a valid digit in the specified
 3838        * radix, <code>-1</code> is returned. A character is a valid digit
 3839        * if at least one of the following is true:
 3840        * <ul>
 3841        * <li>The method <code>isDigit</code> is <code>true</code> of the character
 3842        *     and the Unicode decimal digit value of the character (or its
 3843        *     single-character decomposition) is less than the specified radix.
 3844        *     In this case the decimal digit value is returned.
 3845        * <li>The character is one of the uppercase Latin letters
 3846        *     <code>'A'</code> through <code>'Z'</code> and its code is less than
 3847        *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
 3848        *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
 3849        *     is returned.
 3850        * <li>The character is one of the lowercase Latin letters
 3851        *     <code>'a'</code> through <code>'z'</code> and its code is less than
 3852        *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
 3853        *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
 3854        *     is returned.
 3855        * </ul>
 3856        *
 3857        * <p><b>Note:</b> This method cannot handle <a
 3858        * href="#supplementary"> supplementary characters</a>. To support
 3859        * all Unicode characters, including supplementary characters, use
 3860        * the {@link #digit(int, int)} method.
 3861        *
 3862        * @param   ch      the character to be converted.
 3863        * @param   radix   the radix.
 3864        * @return  the numeric value represented by the character in the
 3865        *          specified radix.
 3866        * @see     java.lang.Character#forDigit(int, int)
 3867        * @see     java.lang.Character#isDigit(char)
 3868        */
 3869       public static int digit(char ch, int radix) {
 3870           return digit((int)ch, radix);
 3871       }
 3872   
 3873       /**
 3874        * Returns the numeric value of the specified character (Unicode
 3875        * code point) in the specified radix.
 3876        *
 3877        * <p>If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
 3878        * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
 3879        * character is not a valid digit in the specified
 3880        * radix, <code>-1</code> is returned. A character is a valid digit
 3881        * if at least one of the following is true:
 3882        * <ul>
 3883        * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
 3884        *     and the Unicode decimal digit value of the character (or its
 3885        *     single-character decomposition) is less than the specified radix.
 3886        *     In this case the decimal digit value is returned.
 3887        * <li>The character is one of the uppercase Latin letters
 3888        *     <code>'A'</code> through <code>'Z'</code> and its code is less than
 3889        *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
 3890        *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
 3891        *     is returned.
 3892        * <li>The character is one of the lowercase Latin letters
 3893        *     <code>'a'</code> through <code>'z'</code> and its code is less than
 3894        *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
 3895        *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
 3896        *     is returned.
 3897        * </ul>
 3898        *
 3899        * @param   codePoint the character (Unicode code point) to be converted.
 3900        * @param   radix   the radix.
 3901        * @return  the numeric value represented by the character in the
 3902        *          specified radix.
 3903        * @see     java.lang.Character#forDigit(int, int)
 3904        * @see     java.lang.Character#isDigit(int)
 3905        * @since   1.5
 3906        */
 3907       public static int digit(int codePoint, int radix) {
 3908           return CharacterData.of(codePoint).digit(codePoint, radix);
 3909       }
 3910   
 3911       /**
 3912        * Returns the <code>int</code> value that the specified Unicode
 3913        * character represents. For example, the character
 3914        * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
 3915        * an int with a value of 50.
 3916        * <p>
 3917        * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
 3918        * <code>'&#92;u005A'</code>), lowercase
 3919        * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
 3920        * full width variant (<code>'&#92;uFF21'</code> through
 3921        * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
 3922        * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
 3923        * through 35. This is independent of the Unicode specification,
 3924        * which does not assign numeric values to these <code>char</code>
 3925        * values.
 3926        * <p>
 3927        * If the character does not have a numeric value, then -1 is returned.
 3928        * If the character has a numeric value that cannot be represented as a
 3929        * nonnegative integer (for example, a fractional value), then -2
 3930        * is returned.
 3931        *
 3932        * <p><b>Note:</b> This method cannot handle <a
 3933        * href="#supplementary"> supplementary characters</a>. To support
 3934        * all Unicode characters, including supplementary characters, use
 3935        * the {@link #getNumericValue(int)} method.
 3936        *
 3937        * @param   ch      the character to be converted.
 3938        * @return  the numeric value of the character, as a nonnegative <code>int</code>
 3939        *           value; -2 if the character has a numeric value that is not a
 3940        *          nonnegative integer; -1 if the character has no numeric value.
 3941        * @see     java.lang.Character#forDigit(int, int)
 3942        * @see     java.lang.Character#isDigit(char)
 3943        * @since   1.1
 3944        */
 3945       public static int getNumericValue(char ch) {
 3946           return getNumericValue((int)ch);
 3947       }
 3948   
 3949       /**
 3950        * Returns the <code>int</code> value that the specified
 3951        * character (Unicode code point) represents. For example, the character
 3952        * <code>'&#92;u216C'</code> (the Roman numeral fifty) will return
 3953        * an <code>int</code> with a value of 50.
 3954        * <p>
 3955        * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
 3956        * <code>'&#92;u005A'</code>), lowercase
 3957        * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
 3958        * full width variant (<code>'&#92;uFF21'</code> through
 3959        * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
 3960        * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
 3961        * through 35. This is independent of the Unicode specification,
 3962        * which does not assign numeric values to these <code>char</code>
 3963        * values.
 3964        * <p>
 3965        * If the character does not have a numeric value, then -1 is returned.
 3966        * If the character has a numeric value that cannot be represented as a
 3967        * nonnegative integer (for example, a fractional value), then -2
 3968        * is returned.
 3969        *
 3970        * @param   codePoint the character (Unicode code point) to be converted.
 3971        * @return  the numeric value of the character, as a nonnegative <code>int</code>
 3972        *          value; -2 if the character has a numeric value that is not a
 3973        *          nonnegative integer; -1 if the character has no numeric value.
 3974        * @see     java.lang.Character#forDigit(int, int)
 3975        * @see     java.lang.Character#isDigit(int)
 3976        * @since   1.5
 3977        */
 3978       public static int getNumericValue(int codePoint) {
 3979           return CharacterData.of(codePoint).getNumericValue(codePoint);
 3980       }
 3981   
 3982       /**
 3983        * Determines if the specified character is ISO-LATIN-1 white space.
 3984        * This method returns <code>true</code> for the following five
 3985        * characters only:
 3986        * <table>
 3987        * <tr><td><code>'\t'</code></td>            <td><code>'&#92;u0009'</code></td>
 3988        *     <td><code>HORIZONTAL TABULATION</code></td></tr>
 3989        * <tr><td><code>'\n'</code></td>            <td><code>'&#92;u000A'</code></td>
 3990        *     <td><code>NEW LINE</code></td></tr>
 3991        * <tr><td><code>'\f'</code></td>            <td><code>'&#92;u000C'</code></td>
 3992        *     <td><code>FORM FEED</code></td></tr>
 3993        * <tr><td><code>'\r'</code></td>            <td><code>'&#92;u000D'</code></td>
 3994        *     <td><code>CARRIAGE RETURN</code></td></tr>
 3995        * <tr><td><code>'&nbsp;'</code></td>  <td><code>'&#92;u0020'</code></td>
 3996        *     <td><code>SPACE</code></td></tr>
 3997        * </table>
 3998        *
 3999        * @param      ch   the character to be tested.
 4000        * @return     <code>true</code> if the character is ISO-LATIN-1 white
 4001        *             space; <code>false</code> otherwise.
 4002        * @see        java.lang.Character#isSpaceChar(char)
 4003        * @see        java.lang.Character#isWhitespace(char)
 4004        * @deprecated Replaced by isWhitespace(char).
 4005        */
 4006       @Deprecated
 4007       public static boolean isSpace(char ch) {
 4008           return (ch <= 0x0020) &&
 4009               (((((1L << 0x0009) |
 4010               (1L << 0x000A) |
 4011               (1L << 0x000C) |
 4012               (1L << 0x000D) |
 4013               (1L << 0x0020)) >> ch) & 1L) != 0);
 4014       }
 4015   
 4016   
 4017       /**
 4018        * Determines if the specified character is a Unicode space character.
 4019        * A character is considered to be a space character if and only if
 4020        * it is specified to be a space character by the Unicode standard. This
 4021        * method returns true if the character's general category type is any of
 4022        * the following:
 4023        * <ul>
 4024        * <li> <code>SPACE_SEPARATOR</code>
 4025        * <li> <code>LINE_SEPARATOR</code>
 4026        * <li> <code>PARAGRAPH_SEPARATOR</code>
 4027        * </ul>
 4028        *
 4029        * <p><b>Note:</b> This method cannot handle <a
 4030        * href="#supplementary"> supplementary characters</a>. To support
 4031        * all Unicode characters, including supplementary characters, use
 4032        * the {@link #isSpaceChar(int)} method.
 4033        *
 4034        * @param   ch      the character to be tested.
 4035        * @return  <code>true</code> if the character is a space character;
 4036        *          <code>false</code> otherwise.
 4037        * @see     java.lang.Character#isWhitespace(char)
 4038        * @since   1.1
 4039        */
 4040       public static boolean isSpaceChar(char ch) {
 4041           return isSpaceChar((int)ch);
 4042       }
 4043   
 4044       /**
 4045        * Determines if the specified character (Unicode code point) is a
 4046        * Unicode space character.  A character is considered to be a
 4047        * space character if and only if it is specified to be a space
 4048        * character by the Unicode standard. This method returns true if
 4049        * the character's general category type is any of the following:
 4050        *
 4051        * <ul>
 4052        * <li> {@link #SPACE_SEPARATOR}
 4053        * <li> {@link #LINE_SEPARATOR}
 4054        * <li> {@link #PARAGRAPH_SEPARATOR}
 4055        * </ul>
 4056        *
 4057        * @param   codePoint the character (Unicode code point) to be tested.
 4058        * @return  <code>true</code> if the character is a space character;
 4059        *          <code>false</code> otherwise.
 4060        * @see     java.lang.Character#isWhitespace(int)
 4061        * @since   1.5
 4062        */
 4063       public static boolean isSpaceChar(int codePoint) {
 4064           return ((((1 << Character.SPACE_SEPARATOR) |
 4065                     (1 << Character.LINE_SEPARATOR) |
 4066                     (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
 4067               != 0;
 4068       }
 4069   
 4070       /**
 4071        * Determines if the specified character is white space according to Java.
 4072        * A character is a Java whitespace character if and only if it satisfies
 4073        * one of the following criteria:
 4074        * <ul>
 4075        * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
 4076        *      <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
 4077        *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
 4078        *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
 4079        * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
 4080        * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
 4081        * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
 4082        * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
 4083        * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
 4084        * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
 4085        * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
 4086        * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
 4087        * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
 4088        * </ul>
 4089        *
 4090        * <p><b>Note:</b> This method cannot handle <a
 4091        * href="#supplementary"> supplementary characters</a>. To support
 4092        * all Unicode characters, including supplementary characters, use
 4093        * the {@link #isWhitespace(int)} method.
 4094        *
 4095        * @param   ch the character to be tested.
 4096        * @return  <code>true</code> if the character is a Java whitespace
 4097        *          character; <code>false</code> otherwise.
 4098        * @see     java.lang.Character#isSpaceChar(char)
 4099        * @since   1.1
 4100        */
 4101       public static boolean isWhitespace(char ch) {
 4102           return isWhitespace((int)ch);
 4103       }
 4104   
 4105       /**
 4106        * Determines if the specified character (Unicode code point) is
 4107        * white space according to Java.  A character is a Java
 4108        * whitespace character if and only if it satisfies one of the
 4109        * following criteria:
 4110        * <ul>
 4111        * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
 4112        *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
 4113        *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
 4114        *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
 4115        * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
 4116        * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
 4117        * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
 4118        * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
 4119        * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
 4120        * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
 4121        * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
 4122        * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
 4123        * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
 4124        * </ul>
 4125        * <p>
 4126        *
 4127        * @param   codePoint the character (Unicode code point) to be tested.
 4128        * @return  <code>true</code> if the character is a Java whitespace
 4129        *          character; <code>false</code> otherwise.
 4130        * @see     java.lang.Character#isSpaceChar(int)
 4131        * @since   1.5
 4132        */
 4133       public static boolean isWhitespace(int codePoint) {
 4134           return CharacterData.of(codePoint).isWhitespace(codePoint);
 4135       }
 4136   
 4137       /**
 4138        * Determines if the specified character is an ISO control
 4139        * character.  A character is considered to be an ISO control
 4140        * character if its code is in the range <code>'&#92;u0000'</code>
 4141        * through <code>'&#92;u001F'</code> or in the range
 4142        * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
 4143        *
 4144        * <p><b>Note:</b> This method cannot handle <a
 4145        * href="#supplementary"> supplementary characters</a>. To support
 4146        * all Unicode characters, including supplementary characters, use
 4147        * the {@link #isISOControl(int)} method.
 4148        *
 4149        * @param   ch      the character to be tested.
 4150        * @return  <code>true</code> if the character is an ISO control character;
 4151        *          <code>false</code> otherwise.
 4152        *
 4153        * @see     java.lang.Character#isSpaceChar(char)
 4154        * @see     java.lang.Character#isWhitespace(char)
 4155        * @since   1.1
 4156        */
 4157       public static boolean isISOControl(char ch) {
 4158           return isISOControl((int)ch);
 4159       }
 4160   
 4161       /**
 4162        * Determines if the referenced character (Unicode code point) is an ISO control
 4163        * character.  A character is considered to be an ISO control
 4164        * character if its code is in the range <code>'&#92;u0000'</code>
 4165        * through <code>'&#92;u001F'</code> or in the range
 4166        * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
 4167        *
 4168        * @param   codePoint the character (Unicode code point) to be tested.
 4169        * @return  <code>true</code> if the character is an ISO control character;
 4170        *          <code>false</code> otherwise.
 4171        * @see     java.lang.Character#isSpaceChar(int)
 4172        * @see     java.lang.Character#isWhitespace(int)
 4173        * @since   1.5
 4174        */
 4175       public static boolean isISOControl(int codePoint) {
 4176           return (codePoint >= 0x0000 && codePoint <= 0x001F) ||
 4177               (codePoint >= 0x007F && codePoint <= 0x009F);
 4178       }
 4179   
 4180       /**
 4181        * Returns a value indicating a character's general category.
 4182        *
 4183        * <p><b>Note:</b> This method cannot handle <a
 4184        * href="#supplementary"> supplementary characters</a>. To support
 4185        * all Unicode characters, including supplementary characters, use
 4186        * the {@link #getType(int)} method.
 4187        *
 4188        * @param   ch      the character to be tested.
 4189        * @return  a value of type <code>int</code> representing the
 4190        *          character's general category.
 4191        * @see     java.lang.Character#COMBINING_SPACING_MARK
 4192        * @see     java.lang.Character#CONNECTOR_PUNCTUATION
 4193        * @see     java.lang.Character#CONTROL
 4194        * @see     java.lang.Character#CURRENCY_SYMBOL
 4195        * @see     java.lang.Character#DASH_PUNCTUATION
 4196        * @see     java.lang.Character#DECIMAL_DIGIT_NUMBER
 4197        * @see     java.lang.Character#ENCLOSING_MARK
 4198        * @see     java.lang.Character#END_PUNCTUATION
 4199        * @see     java.lang.Character#FINAL_QUOTE_PUNCTUATION
 4200        * @see     java.lang.Character#FORMAT
 4201        * @see     java.lang.Character#INITIAL_QUOTE_PUNCTUATION
 4202        * @see     java.lang.Character#LETTER_NUMBER
 4203        * @see     java.lang.Character#LINE_SEPARATOR
 4204        * @see     java.lang.Character#LOWERCASE_LETTER
 4205        * @see     java.lang.Character#MATH_SYMBOL
 4206        * @see     java.lang.Character#MODIFIER_LETTER
 4207        * @see     java.lang.Character#MODIFIER_SYMBOL
 4208        * @see     java.lang.Character#NON_SPACING_MARK
 4209        * @see     java.lang.Character#OTHER_LETTER
 4210        * @see     java.lang.Character#OTHER_NUMBER
 4211        * @see     java.lang.Character#OTHER_PUNCTUATION
 4212        * @see     java.lang.Character#OTHER_SYMBOL
 4213        * @see     java.lang.Character#PARAGRAPH_SEPARATOR
 4214        * @see     java.lang.Character#PRIVATE_USE
 4215        * @see     java.lang.Character#SPACE_SEPARATOR
 4216        * @see     java.lang.Character#START_PUNCTUATION
 4217        * @see     java.lang.Character#SURROGATE
 4218        * @see     java.lang.Character#TITLECASE_LETTER
 4219        * @see     java.lang.Character#UNASSIGNED
 4220        * @see     java.lang.Character#UPPERCASE_LETTER
 4221        * @since   1.1
 4222        */
 4223       public static int getType(char ch) {
 4224           return getType((int)ch);
 4225       }
 4226   
 4227       /**
 4228        * Returns a value indicating a character's general category.
 4229        *
 4230        * @param   codePoint the character (Unicode code point) to be tested.
 4231        * @return  a value of type <code>int</code> representing the
 4232        *          character's general category.
 4233        * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
 4234        * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
 4235        * @see     Character#CONTROL CONTROL
 4236        * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
 4237        * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
 4238        * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
 4239        * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
 4240        * @see     Character#END_PUNCTUATION END_PUNCTUATION
 4241        * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
 4242        * @see     Character#FORMAT FORMAT
 4243        * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
 4244        * @see     Character#LETTER_NUMBER LETTER_NUMBER
 4245        * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
 4246        * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
 4247        * @see     Character#MATH_SYMBOL MATH_SYMBOL
 4248        * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
 4249        * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
 4250        * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
 4251        * @see     Character#OTHER_LETTER OTHER_LETTER
 4252        * @see     Character#OTHER_NUMBER OTHER_NUMBER
 4253        * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
 4254        * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
 4255        * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
 4256        * @see     Character#PRIVATE_USE PRIVATE_USE
 4257        * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
 4258        * @see     Character#START_PUNCTUATION START_PUNCTUATION
 4259        * @see     Character#SURROGATE SURROGATE
 4260        * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
 4261        * @see     Character#UNASSIGNED UNASSIGNED
 4262        * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
 4263        * @since   1.5
 4264        */
 4265       public static int getType(int codePoint) {
 4266           return CharacterData.of(codePoint).getType(codePoint);
 4267       }
 4268   
 4269       /**
 4270        * Determines the character representation for a specific digit in
 4271        * the specified radix. If the value of <code>radix</code> is not a
 4272        * valid radix, or the value of <code>digit</code> is not a valid
 4273        * digit in the specified radix, the null character
 4274        * (<code>'&#92;u0000'</code>) is returned.
 4275        * <p>
 4276        * The <code>radix</code> argument is valid if it is greater than or
 4277        * equal to <code>MIN_RADIX</code> and less than or equal to
 4278        * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
 4279        * <code>0&nbsp;&lt;=digit&nbsp;&lt;&nbsp;radix</code>.
 4280        * <p>
 4281        * If the digit is less than 10, then
 4282        * <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
 4283        * <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
 4284        *
 4285        * @param   digit   the number to convert to a character.
 4286        * @param   radix   the radix.
 4287        * @return  the <code>char</code> representation of the specified digit
 4288        *          in the specified radix.
 4289        * @see     java.lang.Character#MIN_RADIX
 4290        * @see     java.lang.Character#MAX_RADIX
 4291        * @see     java.lang.Character#digit(char, int)
 4292        */
 4293       public static char forDigit(int digit, int radix) {
 4294           if ((digit >= radix) || (digit < 0)) {
 4295               return '\0';
 4296           }
 4297           if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
 4298               return '\0';
 4299           }
 4300           if (digit < 10) {
 4301               return (char)('0' + digit);
 4302           }
 4303           return (char)('a' - 10 + digit);
 4304       }
 4305   
 4306       /**
 4307        * Returns the Unicode directionality property for the given
 4308        * character.  Character directionality is used to calculate the
 4309        * visual ordering of text. The directionality value of undefined
 4310        * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
 4311        *
 4312        * <p><b>Note:</b> This method cannot handle <a
 4313        * href="#supplementary"> supplementary characters</a>. To support
 4314        * all Unicode characters, including supplementary characters, use
 4315        * the {@link #getDirectionality(int)} method.
 4316        *
 4317        * @param  ch <code>char</code> for which the directionality property
 4318        *            is requested.
 4319        * @return the directionality property of the <code>char</code> value.
 4320        *
 4321        * @see Character#DIRECTIONALITY_UNDEFINED
 4322        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
 4323        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
 4324        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
 4325        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
 4326        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 4327        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 4328        * @see Character#DIRECTIONALITY_ARABIC_NUMBER
 4329        * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 4330        * @see Character#DIRECTIONALITY_NONSPACING_MARK
 4331        * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
 4332        * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
 4333        * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
 4334        * @see Character#DIRECTIONALITY_WHITESPACE
 4335        * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
 4336        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
 4337        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
 4338        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
 4339        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
 4340        * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
 4341        * @since 1.4
 4342        */
 4343       public static byte getDirectionality(char ch) {
 4344           return getDirectionality((int)ch);
 4345       }
 4346   
 4347       /**
 4348        * Returns the Unicode directionality property for the given
 4349        * character (Unicode code point).  Character directionality is
 4350        * used to calculate the visual ordering of text. The
 4351        * directionality value of undefined character is {@link
 4352        * #DIRECTIONALITY_UNDEFINED}.
 4353        *
 4354        * @param   codePoint the character (Unicode code point) for which
 4355        *          the directionality property is requested.
 4356        * @return the directionality property of the character.
 4357        *
 4358        * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
 4359        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
 4360        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
 4361        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
 4362        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
 4363        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 4364        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 4365        * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
 4366        * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 4367        * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
 4368        * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
 4369        * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
 4370        * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
 4371        * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
 4372        * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
 4373        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
 4374        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
 4375        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
 4376        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
 4377        * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
 4378        * @since    1.5
 4379        */
 4380       public static byte getDirectionality(int codePoint) {
 4381           return CharacterData.of(codePoint).getDirectionality(codePoint);
 4382       }
 4383   
 4384       /**
 4385        * Determines whether the character is mirrored according to the
 4386        * Unicode specification.  Mirrored characters should have their
 4387        * glyphs horizontally mirrored when displayed in text that is
 4388        * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
 4389        * PARENTHESIS is semantically defined to be an <i>opening
 4390        * parenthesis</i>.  This will appear as a "(" in text that is
 4391        * left-to-right but as a ")" in text that is right-to-left.
 4392        *
 4393        * <p><b>Note:</b> This method cannot handle <a
 4394        * href="#supplementary"> supplementary characters</a>. To support
 4395        * all Unicode characters, including supplementary characters, use
 4396        * the {@link #isMirrored(int)} method.
 4397        *
 4398        * @param  ch <code>char</code> for which the mirrored property is requested
 4399        * @return <code>true</code> if the char is mirrored, <code>false</code>
 4400        *         if the <code>char</code> is not mirrored or is not defined.
 4401        * @since 1.4
 4402        */
 4403       public static boolean isMirrored(char ch) {
 4404           return isMirrored((int)ch);
 4405       }
 4406   
 4407       /**
 4408        * Determines whether the specified character (Unicode code point)
 4409        * is mirrored according to the Unicode specification.  Mirrored
 4410        * characters should have their glyphs horizontally mirrored when
 4411        * displayed in text that is right-to-left.  For example,
 4412        * <code>'&#92;u0028'</code> LEFT PARENTHESIS is semantically
 4413        * defined to be an <i>opening parenthesis</i>.  This will appear
 4414        * as a "(" in text that is left-to-right but as a ")" in text
 4415        * that is right-to-left.
 4416        *
 4417        * @param   codePoint the character (Unicode code point) to be tested.
 4418        * @return  <code>true</code> if the character is mirrored, <code>false</code>
 4419        *          if the character is not mirrored or is not defined.
 4420        * @since   1.5
 4421        */
 4422       public static boolean isMirrored(int codePoint) {
 4423           return CharacterData.of(codePoint).isMirrored(codePoint);
 4424       }
 4425   
 4426       /**
 4427        * Compares two <code>Character</code> objects numerically.
 4428        *
 4429        * @param   anotherCharacter   the <code>Character</code> to be compared.
 4430   
 4431        * @return  the value <code>0</code> if the argument <code>Character</code>
 4432        *          is equal to this <code>Character</code>; a value less than
 4433        *          <code>0</code> if this <code>Character</code> is numerically less
 4434        *          than the <code>Character</code> argument; and a value greater than
 4435        *          <code>0</code> if this <code>Character</code> is numerically greater
 4436        *          than the <code>Character</code> argument (unsigned comparison).
 4437        *          Note that this is strictly a numerical comparison; it is not
 4438        *          locale-dependent.
 4439        * @since   1.2
 4440        */
 4441       public int compareTo(Character anotherCharacter) {
 4442           return this.value - anotherCharacter.value;
 4443       }
 4444   
 4445       /**
 4446        * Converts the character (Unicode code point) argument to uppercase using
 4447        * information from the UnicodeData file.
 4448        * <p>
 4449        *
 4450        * @param   codePoint   the character (Unicode code point) to be converted.
 4451        * @return  either the uppercase equivalent of the character, if
 4452        *          any, or an error flag (<code>Character.ERROR</code>)
 4453        *          that indicates that a 1:M <code>char</code> mapping exists.
 4454        * @see     java.lang.Character#isLowerCase(char)
 4455        * @see     java.lang.Character#isUpperCase(char)
 4456        * @see     java.lang.Character#toLowerCase(char)
 4457        * @see     java.lang.Character#toTitleCase(char)
 4458        * @since 1.4
 4459        */
 4460       static int toUpperCaseEx(int codePoint) {
 4461           assert isValidCodePoint(codePoint);
 4462           return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
 4463       }
 4464   
 4465       /**
 4466        * Converts the character (Unicode code point) argument to uppercase using case
 4467        * mapping information from the SpecialCasing file in the Unicode
 4468        * specification. If a character has no explicit uppercase
 4469        * mapping, then the <code>char</code> itself is returned in the
 4470        * <code>char[]</code>.
 4471        *
 4472        * @param   codePoint   the character (Unicode code point) to be converted.
 4473        * @return a <code>char[]</code> with the uppercased character.
 4474        * @since 1.4
 4475        */
 4476       static char[] toUpperCaseCharArray(int codePoint) {
 4477           // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
 4478           assert isValidCodePoint(codePoint) &&
 4479                  !isSupplementaryCodePoint(codePoint);
 4480           return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
 4481       }
 4482   
 4483       /**
 4484        * The number of bits used to represent a <tt>char</tt> value in unsigned
 4485        * binary form.
 4486        *
 4487        * @since 1.5
 4488        */
 4489       public static final int SIZE = 16;
 4490   
 4491       /**
 4492        * Returns the value obtained by reversing the order of the bytes in the
 4493        * specified <tt>char</tt> value.
 4494        *
 4495        * @return the value obtained by reversing (or, equivalently, swapping)
 4496        *     the bytes in the specified <tt>char</tt> value.
 4497        * @since 1.5
 4498        */
 4499       public static char reverseBytes(char ch) {
 4500           return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
 4501       }
 4502   }

Save This Page
Home » Open-JDK-6.b17-src » java » lang » [javadoc | source]