Home » openjdk-7 » java » lang » [javadoc | source]

    1   /*
    2    * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Oracle designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Oracle in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   22    * or visit www.oracle.com if you need additional information or have any
   23    * questions.
   24    */
   25   
   26   package java.lang;
   27   
   28   import java.util.Arrays;
   29   import java.util.Map;
   30   import java.util.HashMap;
   31   import java.util.Locale;
   32   
   33   /**
   34    * The {@code Character} class wraps a value of the primitive
   35    * type {@code char} in an object. An object of type
   36    * {@code Character} contains a single field whose type is
   37    * {@code char}.
   38    * <p>
   39    * In addition, this class provides several methods for determining
   40    * a character's category (lowercase letter, digit, etc.) and for converting
   41    * characters from uppercase to lowercase and vice versa.
   42    * <p>
   43    * Character information is based on the Unicode Standard, version 6.0.0.
   44    * <p>
   45    * The methods and data of class {@code Character} are defined by
   46    * the information in the <i>UnicodeData</i> file that is part of the
   47    * Unicode Character Database maintained by the Unicode
   48    * Consortium. This file specifies various properties including name
   49    * and general category for every defined Unicode code point or
   50    * character range.
   51    * <p>
   52    * The file and its description are available from the Unicode Consortium at:
   53    * <ul>
   54    * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
   55    * </ul>
   56    *
   57    * <h4><a name="unicode">Unicode Character Representations</a></h4>
   58    *
   59    * <p>The {@code char} data type (and therefore the value that a
   60    * {@code Character} object encapsulates) are based on the
   61    * original Unicode specification, which defined characters as
   62    * fixed-width 16-bit entities. The Unicode Standard has since been
   63    * changed to allow for characters whose representation requires more
   64    * than 16 bits.  The range of legal <em>code point</em>s is now
   65    * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
   66    * (Refer to the <a
   67    * href="http://www.unicode.org/reports/tr27/#notation"><i>
   68    * definition</i></a> of the U+<i>n</i> notation in the Unicode
   69    * Standard.)
   70    *
   71    * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
   72    * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
   73    * <a name="supplementary">Characters</a> whose code points are greater
   74    * than U+FFFF are called <em>supplementary character</em>s.  The Java
   75    * platform uses the UTF-16 representation in {@code char} arrays and
   76    * in the {@code String} and {@code StringBuffer} classes. In
   77    * this representation, supplementary characters are represented as a pair
   78    * of {@code char} values, the first from the <em>high-surrogates</em>
   79    * range, (&#92;uD800-&#92;uDBFF), the second from the
   80    * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
   81    *
   82    * <p>A {@code char} value, therefore, represents Basic
   83    * Multilingual Plane (BMP) code points, including the surrogate
   84    * code points, or code units of the UTF-16 encoding. An
   85    * {@code int} value represents all Unicode code points,
   86    * including supplementary code points. The lower (least significant)
   87    * 21 bits of {@code int} are used to represent Unicode code
   88    * points and the upper (most significant) 11 bits must be zero.
   89    * Unless otherwise specified, the behavior with respect to
   90    * supplementary characters and surrogate {@code char} values is
   91    * as follows:
   92    *
   93    * <ul>
   94    * <li>The methods that only accept a {@code char} value cannot support
   95    * supplementary characters. They treat {@code char} values from the
   96    * surrogate ranges as undefined characters. For example,
   97    * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
   98    * this specific value if followed by any low-surrogate value in a string
   99    * would represent a letter.
  100    *
  101    * <li>The methods that accept an {@code int} value support all
  102    * Unicode characters, including supplementary characters. For
  103    * example, {@code Character.isLetter(0x2F81A)} returns
  104    * {@code true} because the code point value represents a letter
  105    * (a CJK ideograph).
  106    * </ul>
  107    *
  108    * <p>In the Java SE API documentation, <em>Unicode code point</em> is
  109    * used for character values in the range between U+0000 and U+10FFFF,
  110    * and <em>Unicode code unit</em> is used for 16-bit
  111    * {@code char} values that are code units of the <em>UTF-16</em>
  112    * encoding. For more information on Unicode terminology, refer to the
  113    * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
  114    *
  115    * @author  Lee Boynton
  116    * @author  Guy Steele
  117    * @author  Akira Tanaka
  118    * @author  Martin Buchholz
  119    * @author  Ulf Zibis
  120    * @since   1.0
  121    */
  122   public final
  123   class Character implements java.io.Serializable, Comparable<Character> {
  124       /**
  125        * The minimum radix available for conversion to and from strings.
  126        * The constant value of this field is the smallest value permitted
  127        * for the radix argument in radix-conversion methods such as the
  128        * {@code digit} method, the {@code forDigit} method, and the
  129        * {@code toString} method of class {@code Integer}.
  130        *
  131        * @see     Character#digit(char, int)
  132        * @see     Character#forDigit(int, int)
  133        * @see     Integer#toString(int, int)
  134        * @see     Integer#valueOf(String)
  135        */
  136       public static final int MIN_RADIX = 2;
  137   
  138       /**
  139        * The maximum radix available for conversion to and from strings.
  140        * The constant value of this field is the largest value permitted
  141        * for the radix argument in radix-conversion methods such as the
  142        * {@code digit} method, the {@code forDigit} method, and the
  143        * {@code toString} method of class {@code Integer}.
  144        *
  145        * @see     Character#digit(char, int)
  146        * @see     Character#forDigit(int, int)
  147        * @see     Integer#toString(int, int)
  148        * @see     Integer#valueOf(String)
  149        */
  150       public static final int MAX_RADIX = 36;
  151   
  152       /**
  153        * The constant value of this field is the smallest value of type
  154        * {@code char}, {@code '\u005Cu0000'}.
  155        *
  156        * @since   1.0.2
  157        */
  158       public static final char MIN_VALUE = '\u0000';
  159   
  160       /**
  161        * The constant value of this field is the largest value of type
  162        * {@code char}, {@code '\u005CuFFFF'}.
  163        *
  164        * @since   1.0.2
  165        */
  166       public static final char MAX_VALUE = '\uFFFF';
  167   
  168       /**
  169        * The {@code Class} instance representing the primitive type
  170        * {@code char}.
  171        *
  172        * @since   1.1
  173        */
  174       @SuppressWarnings("unchecked")
  175       public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
  176   
  177       /*
  178        * Normative general types
  179        */
  180   
  181       /*
  182        * General character types
  183        */
  184   
  185       /**
  186        * General category "Cn" in the Unicode specification.
  187        * @since   1.1
  188        */
  189       public static final byte UNASSIGNED = 0;
  190   
  191       /**
  192        * General category "Lu" in the Unicode specification.
  193        * @since   1.1
  194        */
  195       public static final byte UPPERCASE_LETTER = 1;
  196   
  197       /**
  198        * General category "Ll" in the Unicode specification.
  199        * @since   1.1
  200        */
  201       public static final byte LOWERCASE_LETTER = 2;
  202   
  203       /**
  204        * General category "Lt" in the Unicode specification.
  205        * @since   1.1
  206        */
  207       public static final byte TITLECASE_LETTER = 3;
  208   
  209       /**
  210        * General category "Lm" in the Unicode specification.
  211        * @since   1.1
  212        */
  213       public static final byte MODIFIER_LETTER = 4;
  214   
  215       /**
  216        * General category "Lo" in the Unicode specification.
  217        * @since   1.1
  218        */
  219       public static final byte OTHER_LETTER = 5;
  220   
  221       /**
  222        * General category "Mn" in the Unicode specification.
  223        * @since   1.1
  224        */
  225       public static final byte NON_SPACING_MARK = 6;
  226   
  227       /**
  228        * General category "Me" in the Unicode specification.
  229        * @since   1.1
  230        */
  231       public static final byte ENCLOSING_MARK = 7;
  232   
  233       /**
  234        * General category "Mc" in the Unicode specification.
  235        * @since   1.1
  236        */
  237       public static final byte COMBINING_SPACING_MARK = 8;
  238   
  239       /**
  240        * General category "Nd" in the Unicode specification.
  241        * @since   1.1
  242        */
  243       public static final byte DECIMAL_DIGIT_NUMBER        = 9;
  244   
  245       /**
  246        * General category "Nl" in the Unicode specification.
  247        * @since   1.1
  248        */
  249       public static final byte LETTER_NUMBER = 10;
  250   
  251       /**
  252        * General category "No" in the Unicode specification.
  253        * @since   1.1
  254        */
  255       public static final byte OTHER_NUMBER = 11;
  256   
  257       /**
  258        * General category "Zs" in the Unicode specification.
  259        * @since   1.1
  260        */
  261       public static final byte SPACE_SEPARATOR = 12;
  262   
  263       /**
  264        * General category "Zl" in the Unicode specification.
  265        * @since   1.1
  266        */
  267       public static final byte LINE_SEPARATOR = 13;
  268   
  269       /**
  270        * General category "Zp" in the Unicode specification.
  271        * @since   1.1
  272        */
  273       public static final byte PARAGRAPH_SEPARATOR = 14;
  274   
  275       /**
  276        * General category "Cc" in the Unicode specification.
  277        * @since   1.1
  278        */
  279       public static final byte CONTROL = 15;
  280   
  281       /**
  282        * General category "Cf" in the Unicode specification.
  283        * @since   1.1
  284        */
  285       public static final byte FORMAT = 16;
  286   
  287       /**
  288        * General category "Co" in the Unicode specification.
  289        * @since   1.1
  290        */
  291       public static final byte PRIVATE_USE = 18;
  292   
  293       /**
  294        * General category "Cs" in the Unicode specification.
  295        * @since   1.1
  296        */
  297       public static final byte SURROGATE = 19;
  298   
  299       /**
  300        * General category "Pd" in the Unicode specification.
  301        * @since   1.1
  302        */
  303       public static final byte DASH_PUNCTUATION = 20;
  304   
  305       /**
  306        * General category "Ps" in the Unicode specification.
  307        * @since   1.1
  308        */
  309       public static final byte START_PUNCTUATION = 21;
  310   
  311       /**
  312        * General category "Pe" in the Unicode specification.
  313        * @since   1.1
  314        */
  315       public static final byte END_PUNCTUATION = 22;
  316   
  317       /**
  318        * General category "Pc" in the Unicode specification.
  319        * @since   1.1
  320        */
  321       public static final byte CONNECTOR_PUNCTUATION = 23;
  322   
  323       /**
  324        * General category "Po" in the Unicode specification.
  325        * @since   1.1
  326        */
  327       public static final byte OTHER_PUNCTUATION = 24;
  328   
  329       /**
  330        * General category "Sm" in the Unicode specification.
  331        * @since   1.1
  332        */
  333       public static final byte MATH_SYMBOL = 25;
  334   
  335       /**
  336        * General category "Sc" in the Unicode specification.
  337        * @since   1.1
  338        */
  339       public static final byte CURRENCY_SYMBOL = 26;
  340   
  341       /**
  342        * General category "Sk" in the Unicode specification.
  343        * @since   1.1
  344        */
  345       public static final byte MODIFIER_SYMBOL = 27;
  346   
  347       /**
  348        * General category "So" in the Unicode specification.
  349        * @since   1.1
  350        */
  351       public static final byte OTHER_SYMBOL = 28;
  352   
  353       /**
  354        * General category "Pi" in the Unicode specification.
  355        * @since   1.4
  356        */
  357       public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
  358   
  359       /**
  360        * General category "Pf" in the Unicode specification.
  361        * @since   1.4
  362        */
  363       public static final byte FINAL_QUOTE_PUNCTUATION = 30;
  364   
  365       /**
  366        * Error flag. Use int (code point) to avoid confusion with U+FFFF.
  367        */
  368       static final int ERROR = 0xFFFFFFFF;
  369   
  370   
  371       /**
  372        * Undefined bidirectional character type. Undefined {@code char}
  373        * values have undefined directionality in the Unicode specification.
  374        * @since 1.4
  375        */
  376       public static final byte DIRECTIONALITY_UNDEFINED = -1;
  377   
  378       /**
  379        * Strong bidirectional character type "L" in the Unicode specification.
  380        * @since 1.4
  381        */
  382       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
  383   
  384       /**
  385        * Strong bidirectional character type "R" in the Unicode specification.
  386        * @since 1.4
  387        */
  388       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
  389   
  390       /**
  391       * Strong bidirectional character type "AL" in the Unicode specification.
  392        * @since 1.4
  393        */
  394       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
  395   
  396       /**
  397        * Weak bidirectional character type "EN" in the Unicode specification.
  398        * @since 1.4
  399        */
  400       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
  401   
  402       /**
  403        * Weak bidirectional character type "ES" in the Unicode specification.
  404        * @since 1.4
  405        */
  406       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
  407   
  408       /**
  409        * Weak bidirectional character type "ET" in the Unicode specification.
  410        * @since 1.4
  411        */
  412       public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
  413   
  414       /**
  415        * Weak bidirectional character type "AN" in the Unicode specification.
  416        * @since 1.4
  417        */
  418       public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
  419   
  420       /**
  421        * Weak bidirectional character type "CS" in the Unicode specification.
  422        * @since 1.4
  423        */
  424       public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
  425   
  426       /**
  427        * Weak bidirectional character type "NSM" in the Unicode specification.
  428        * @since 1.4
  429        */
  430       public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
  431   
  432       /**
  433        * Weak bidirectional character type "BN" in the Unicode specification.
  434        * @since 1.4
  435        */
  436       public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
  437   
  438       /**
  439        * Neutral bidirectional character type "B" in the Unicode specification.
  440        * @since 1.4
  441        */
  442       public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
  443   
  444       /**
  445        * Neutral bidirectional character type "S" in the Unicode specification.
  446        * @since 1.4
  447        */
  448       public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
  449   
  450       /**
  451        * Neutral bidirectional character type "WS" in the Unicode specification.
  452        * @since 1.4
  453        */
  454       public static final byte DIRECTIONALITY_WHITESPACE = 12;
  455   
  456       /**
  457        * Neutral bidirectional character type "ON" in the Unicode specification.
  458        * @since 1.4
  459        */
  460       public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
  461   
  462       /**
  463        * Strong bidirectional character type "LRE" in the Unicode specification.
  464        * @since 1.4
  465        */
  466       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
  467   
  468       /**
  469        * Strong bidirectional character type "LRO" in the Unicode specification.
  470        * @since 1.4
  471        */
  472       public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
  473   
  474       /**
  475        * Strong bidirectional character type "RLE" in the Unicode specification.
  476        * @since 1.4
  477        */
  478       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
  479   
  480       /**
  481        * Strong bidirectional character type "RLO" in the Unicode specification.
  482        * @since 1.4
  483        */
  484       public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
  485   
  486       /**
  487        * Weak bidirectional character type "PDF" in the Unicode specification.
  488        * @since 1.4
  489        */
  490       public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
  491   
  492       /**
  493        * The minimum value of a
  494        * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  495        * Unicode high-surrogate code unit</a>
  496        * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
  497        * A high-surrogate is also known as a <i>leading-surrogate</i>.
  498        *
  499        * @since 1.5
  500        */
  501       public static final char MIN_HIGH_SURROGATE = '\uD800';
  502   
  503       /**
  504        * The maximum value of a
  505        * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  506        * Unicode high-surrogate code unit</a>
  507        * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
  508        * A high-surrogate is also known as a <i>leading-surrogate</i>.
  509        *
  510        * @since 1.5
  511        */
  512       public static final char MAX_HIGH_SURROGATE = '\uDBFF';
  513   
  514       /**
  515        * The minimum value of a
  516        * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  517        * Unicode low-surrogate code unit</a>
  518        * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
  519        * A low-surrogate is also known as a <i>trailing-surrogate</i>.
  520        *
  521        * @since 1.5
  522        */
  523       public static final char MIN_LOW_SURROGATE  = '\uDC00';
  524   
  525       /**
  526        * The maximum value of a
  527        * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  528        * Unicode low-surrogate code unit</a>
  529        * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
  530        * A low-surrogate is also known as a <i>trailing-surrogate</i>.
  531        *
  532        * @since 1.5
  533        */
  534       public static final char MAX_LOW_SURROGATE  = '\uDFFF';
  535   
  536       /**
  537        * The minimum value of a Unicode surrogate code unit in the
  538        * UTF-16 encoding, constant {@code '\u005CuD800'}.
  539        *
  540        * @since 1.5
  541        */
  542       public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
  543   
  544       /**
  545        * The maximum value of a Unicode surrogate code unit in the
  546        * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
  547        *
  548        * @since 1.5
  549        */
  550       public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
  551   
  552       /**
  553        * The minimum value of a
  554        * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
  555        * Unicode supplementary code point</a>, constant {@code U+10000}.
  556        *
  557        * @since 1.5
  558        */
  559       public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
  560   
  561       /**
  562        * The minimum value of a
  563        * <a href="http://www.unicode.org/glossary/#code_point">
  564        * Unicode code point</a>, constant {@code U+0000}.
  565        *
  566        * @since 1.5
  567        */
  568       public static final int MIN_CODE_POINT = 0x000000;
  569   
  570       /**
  571        * The maximum value of a
  572        * <a href="http://www.unicode.org/glossary/#code_point">
  573        * Unicode code point</a>, constant {@code U+10FFFF}.
  574        *
  575        * @since 1.5
  576        */
  577       public static final int MAX_CODE_POINT = 0X10FFFF;
  578   
  579   
  580       /**
  581        * Instances of this class represent particular subsets of the Unicode
  582        * character set.  The only family of subsets defined in the
  583        * {@code Character} class is {@link Character.UnicodeBlock}.
  584        * Other portions of the Java API may define other subsets for their
  585        * own purposes.
  586        *
  587        * @since 1.2
  588        */
  589       public static class Subset  {
  590   
  591           private String name;
  592   
  593           /**
  594            * Constructs a new {@code Subset} instance.
  595            *
  596            * @param  name  The name of this subset
  597            * @exception NullPointerException if name is {@code null}
  598            */
  599           protected Subset(String name) {
  600               if (name == null) {
  601                   throw new NullPointerException("name");
  602               }
  603               this.name = name;
  604           }
  605   
  606           /**
  607            * Compares two {@code Subset} objects for equality.
  608            * This method returns {@code true} if and only if
  609            * {@code this} and the argument refer to the same
  610            * object; since this method is {@code final}, this
  611            * guarantee holds for all subclasses.
  612            */
  613           public final boolean equals(Object obj) {
  614               return (this == obj);
  615           }
  616   
  617           /**
  618            * Returns the standard hash code as defined by the
  619            * {@link Object#hashCode} method.  This method
  620            * is {@code final} in order to ensure that the
  621            * {@code equals} and {@code hashCode} methods will
  622            * be consistent in all subclasses.
  623            */
  624           public final int hashCode() {
  625               return super.hashCode();
  626           }
  627   
  628           /**
  629            * Returns the name of this subset.
  630            */
  631           public final String toString() {
  632               return name;
  633           }
  634       }
  635   
  636       // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
  637       // for the latest specification of Unicode Blocks.
  638   
  639       /**
  640        * A family of character subsets representing the character blocks in the
  641        * Unicode specification. Character blocks generally define characters
  642        * used for a specific script or purpose. A character is contained by
  643        * at most one Unicode block.
  644        *
  645        * @since 1.2
  646        */
  647       public static final class UnicodeBlock extends Subset {
  648   
  649           private static Map<String, UnicodeBlock> map = new HashMap<>(256);
  650   
  651           /**
  652            * Creates a UnicodeBlock with the given identifier name.
  653            * This name must be the same as the block identifier.
  654            */
  655           private UnicodeBlock(String idName) {
  656               super(idName);
  657               map.put(idName, this);
  658           }
  659   
  660           /**
  661            * Creates a UnicodeBlock with the given identifier name and
  662            * alias name.
  663            */
  664           private UnicodeBlock(String idName, String alias) {
  665               this(idName);
  666               map.put(alias, this);
  667           }
  668   
  669           /**
  670            * Creates a UnicodeBlock with the given identifier name and
  671            * alias names.
  672            */
  673           private UnicodeBlock(String idName, String... aliases) {
  674               this(idName);
  675               for (String alias : aliases)
  676                   map.put(alias, this);
  677           }
  678   
  679           /**
  680            * Constant for the "Basic Latin" Unicode character block.
  681            * @since 1.2
  682            */
  683           public static final UnicodeBlock  BASIC_LATIN =
  684               new UnicodeBlock("BASIC_LATIN",
  685                                "BASIC LATIN",
  686                                "BASICLATIN");
  687   
  688           /**
  689            * Constant for the "Latin-1 Supplement" Unicode character block.
  690            * @since 1.2
  691            */
  692           public static final UnicodeBlock LATIN_1_SUPPLEMENT =
  693               new UnicodeBlock("LATIN_1_SUPPLEMENT",
  694                                "LATIN-1 SUPPLEMENT",
  695                                "LATIN-1SUPPLEMENT");
  696   
  697           /**
  698            * Constant for the "Latin Extended-A" Unicode character block.
  699            * @since 1.2
  700            */
  701           public static final UnicodeBlock LATIN_EXTENDED_A =
  702               new UnicodeBlock("LATIN_EXTENDED_A",
  703                                "LATIN EXTENDED-A",
  704                                "LATINEXTENDED-A");
  705   
  706           /**
  707            * Constant for the "Latin Extended-B" Unicode character block.
  708            * @since 1.2
  709            */
  710           public static final UnicodeBlock LATIN_EXTENDED_B =
  711               new UnicodeBlock("LATIN_EXTENDED_B",
  712                                "LATIN EXTENDED-B",
  713                                "LATINEXTENDED-B");
  714   
  715           /**
  716            * Constant for the "IPA Extensions" Unicode character block.
  717            * @since 1.2
  718            */
  719           public static final UnicodeBlock IPA_EXTENSIONS =
  720               new UnicodeBlock("IPA_EXTENSIONS",
  721                                "IPA EXTENSIONS",
  722                                "IPAEXTENSIONS");
  723   
  724           /**
  725            * Constant for the "Spacing Modifier Letters" Unicode character block.
  726            * @since 1.2
  727            */
  728           public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
  729               new UnicodeBlock("SPACING_MODIFIER_LETTERS",
  730                                "SPACING MODIFIER LETTERS",
  731                                "SPACINGMODIFIERLETTERS");
  732   
  733           /**
  734            * Constant for the "Combining Diacritical Marks" Unicode character block.
  735            * @since 1.2
  736            */
  737           public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
  738               new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
  739                                "COMBINING DIACRITICAL MARKS",
  740                                "COMBININGDIACRITICALMARKS");
  741   
  742           /**
  743            * Constant for the "Greek and Coptic" Unicode character block.
  744            * <p>
  745            * This block was previously known as the "Greek" block.
  746            *
  747            * @since 1.2
  748            */
  749           public static final UnicodeBlock GREEK =
  750               new UnicodeBlock("GREEK",
  751                                "GREEK AND COPTIC",
  752                                "GREEKANDCOPTIC");
  753   
  754           /**
  755            * Constant for the "Cyrillic" Unicode character block.
  756            * @since 1.2
  757            */
  758           public static final UnicodeBlock CYRILLIC =
  759               new UnicodeBlock("CYRILLIC");
  760   
  761           /**
  762            * Constant for the "Armenian" Unicode character block.
  763            * @since 1.2
  764            */
  765           public static final UnicodeBlock ARMENIAN =
  766               new UnicodeBlock("ARMENIAN");
  767   
  768           /**
  769            * Constant for the "Hebrew" Unicode character block.
  770            * @since 1.2
  771            */
  772           public static final UnicodeBlock HEBREW =
  773               new UnicodeBlock("HEBREW");
  774   
  775           /**
  776            * Constant for the "Arabic" Unicode character block.
  777            * @since 1.2
  778            */
  779           public static final UnicodeBlock ARABIC =
  780               new UnicodeBlock("ARABIC");
  781   
  782           /**
  783            * Constant for the "Devanagari" Unicode character block.
  784            * @since 1.2
  785            */
  786           public static final UnicodeBlock DEVANAGARI =
  787               new UnicodeBlock("DEVANAGARI");
  788   
  789           /**
  790            * Constant for the "Bengali" Unicode character block.
  791            * @since 1.2
  792            */
  793           public static final UnicodeBlock BENGALI =
  794               new UnicodeBlock("BENGALI");
  795   
  796           /**
  797            * Constant for the "Gurmukhi" Unicode character block.
  798            * @since 1.2
  799            */
  800           public static final UnicodeBlock GURMUKHI =
  801               new UnicodeBlock("GURMUKHI");
  802   
  803           /**
  804            * Constant for the "Gujarati" Unicode character block.
  805            * @since 1.2
  806            */
  807           public static final UnicodeBlock GUJARATI =
  808               new UnicodeBlock("GUJARATI");
  809   
  810           /**
  811            * Constant for the "Oriya" Unicode character block.
  812            * @since 1.2
  813            */
  814           public static final UnicodeBlock ORIYA =
  815               new UnicodeBlock("ORIYA");
  816   
  817           /**
  818            * Constant for the "Tamil" Unicode character block.
  819            * @since 1.2
  820            */
  821           public static final UnicodeBlock TAMIL =
  822               new UnicodeBlock("TAMIL");
  823   
  824           /**
  825            * Constant for the "Telugu" Unicode character block.
  826            * @since 1.2
  827            */
  828           public static final UnicodeBlock TELUGU =
  829               new UnicodeBlock("TELUGU");
  830   
  831           /**
  832            * Constant for the "Kannada" Unicode character block.
  833            * @since 1.2
  834            */
  835           public static final UnicodeBlock KANNADA =
  836               new UnicodeBlock("KANNADA");
  837   
  838           /**
  839            * Constant for the "Malayalam" Unicode character block.
  840            * @since 1.2
  841            */
  842           public static final UnicodeBlock MALAYALAM =
  843               new UnicodeBlock("MALAYALAM");
  844   
  845           /**
  846            * Constant for the "Thai" Unicode character block.
  847            * @since 1.2
  848            */
  849           public static final UnicodeBlock THAI =
  850               new UnicodeBlock("THAI");
  851   
  852           /**
  853            * Constant for the "Lao" Unicode character block.
  854            * @since 1.2
  855            */
  856           public static final UnicodeBlock LAO =
  857               new UnicodeBlock("LAO");
  858   
  859           /**
  860            * Constant for the "Tibetan" Unicode character block.
  861            * @since 1.2
  862            */
  863           public static final UnicodeBlock TIBETAN =
  864               new UnicodeBlock("TIBETAN");
  865   
  866           /**
  867            * Constant for the "Georgian" Unicode character block.
  868            * @since 1.2
  869            */
  870           public static final UnicodeBlock GEORGIAN =
  871               new UnicodeBlock("GEORGIAN");
  872   
  873           /**
  874            * Constant for the "Hangul Jamo" Unicode character block.
  875            * @since 1.2
  876            */
  877           public static final UnicodeBlock HANGUL_JAMO =
  878               new UnicodeBlock("HANGUL_JAMO",
  879                                "HANGUL JAMO",
  880                                "HANGULJAMO");
  881   
  882           /**
  883            * Constant for the "Latin Extended Additional" Unicode character block.
  884            * @since 1.2
  885            */
  886           public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
  887               new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
  888                                "LATIN EXTENDED ADDITIONAL",
  889                                "LATINEXTENDEDADDITIONAL");
  890   
  891           /**
  892            * Constant for the "Greek Extended" Unicode character block.
  893            * @since 1.2
  894            */
  895           public static final UnicodeBlock GREEK_EXTENDED =
  896               new UnicodeBlock("GREEK_EXTENDED",
  897                                "GREEK EXTENDED",
  898                                "GREEKEXTENDED");
  899   
  900           /**
  901            * Constant for the "General Punctuation" Unicode character block.
  902            * @since 1.2
  903            */
  904           public static final UnicodeBlock GENERAL_PUNCTUATION =
  905               new UnicodeBlock("GENERAL_PUNCTUATION",
  906                                "GENERAL PUNCTUATION",
  907                                "GENERALPUNCTUATION");
  908   
  909           /**
  910            * Constant for the "Superscripts and Subscripts" Unicode character
  911            * block.
  912            * @since 1.2
  913            */
  914           public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
  915               new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
  916                                "SUPERSCRIPTS AND SUBSCRIPTS",
  917                                "SUPERSCRIPTSANDSUBSCRIPTS");
  918   
  919           /**
  920            * Constant for the "Currency Symbols" Unicode character block.
  921            * @since 1.2
  922            */
  923           public static final UnicodeBlock CURRENCY_SYMBOLS =
  924               new UnicodeBlock("CURRENCY_SYMBOLS",
  925                                "CURRENCY SYMBOLS",
  926                                "CURRENCYSYMBOLS");
  927   
  928           /**
  929            * Constant for the "Combining Diacritical Marks for Symbols" Unicode
  930            * character block.
  931            * <p>
  932            * This block was previously known as "Combining Marks for Symbols".
  933            * @since 1.2
  934            */
  935           public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
  936               new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
  937                                "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
  938                                "COMBININGDIACRITICALMARKSFORSYMBOLS",
  939                                "COMBINING MARKS FOR SYMBOLS",
  940                                "COMBININGMARKSFORSYMBOLS");
  941   
  942           /**
  943            * Constant for the "Letterlike Symbols" Unicode character block.
  944            * @since 1.2
  945            */
  946           public static final UnicodeBlock LETTERLIKE_SYMBOLS =
  947               new UnicodeBlock("LETTERLIKE_SYMBOLS",
  948                                "LETTERLIKE SYMBOLS",
  949                                "LETTERLIKESYMBOLS");
  950   
  951           /**
  952            * Constant for the "Number Forms" Unicode character block.
  953            * @since 1.2
  954            */
  955           public static final UnicodeBlock NUMBER_FORMS =
  956               new UnicodeBlock("NUMBER_FORMS",
  957                                "NUMBER FORMS",
  958                                "NUMBERFORMS");
  959   
  960           /**
  961            * Constant for the "Arrows" Unicode character block.
  962            * @since 1.2
  963            */
  964           public static final UnicodeBlock ARROWS =
  965               new UnicodeBlock("ARROWS");
  966   
  967           /**
  968            * Constant for the "Mathematical Operators" Unicode character block.
  969            * @since 1.2
  970            */
  971           public static final UnicodeBlock MATHEMATICAL_OPERATORS =
  972               new UnicodeBlock("MATHEMATICAL_OPERATORS",
  973                                "MATHEMATICAL OPERATORS",
  974                                "MATHEMATICALOPERATORS");
  975   
  976           /**
  977            * Constant for the "Miscellaneous Technical" Unicode character block.
  978            * @since 1.2
  979            */
  980           public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
  981               new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
  982                                "MISCELLANEOUS TECHNICAL",
  983                                "MISCELLANEOUSTECHNICAL");
  984   
  985           /**
  986            * Constant for the "Control Pictures" Unicode character block.
  987            * @since 1.2
  988            */
  989           public static final UnicodeBlock CONTROL_PICTURES =
  990               new UnicodeBlock("CONTROL_PICTURES",
  991                                "CONTROL PICTURES",
  992                                "CONTROLPICTURES");
  993   
  994           /**
  995            * Constant for the "Optical Character Recognition" Unicode character block.
  996            * @since 1.2
  997            */
  998           public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
  999               new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
 1000                                "OPTICAL CHARACTER RECOGNITION",
 1001                                "OPTICALCHARACTERRECOGNITION");
 1002   
 1003           /**
 1004            * Constant for the "Enclosed Alphanumerics" Unicode character block.
 1005            * @since 1.2
 1006            */
 1007           public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
 1008               new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
 1009                                "ENCLOSED ALPHANUMERICS",
 1010                                "ENCLOSEDALPHANUMERICS");
 1011   
 1012           /**
 1013            * Constant for the "Box Drawing" Unicode character block.
 1014            * @since 1.2
 1015            */
 1016           public static final UnicodeBlock BOX_DRAWING =
 1017               new UnicodeBlock("BOX_DRAWING",
 1018                                "BOX DRAWING",
 1019                                "BOXDRAWING");
 1020   
 1021           /**
 1022            * Constant for the "Block Elements" Unicode character block.
 1023            * @since 1.2
 1024            */
 1025           public static final UnicodeBlock BLOCK_ELEMENTS =
 1026               new UnicodeBlock("BLOCK_ELEMENTS",
 1027                                "BLOCK ELEMENTS",
 1028                                "BLOCKELEMENTS");
 1029   
 1030           /**
 1031            * Constant for the "Geometric Shapes" Unicode character block.
 1032            * @since 1.2
 1033            */
 1034           public static final UnicodeBlock GEOMETRIC_SHAPES =
 1035               new UnicodeBlock("GEOMETRIC_SHAPES",
 1036                                "GEOMETRIC SHAPES",
 1037                                "GEOMETRICSHAPES");
 1038   
 1039           /**
 1040            * Constant for the "Miscellaneous Symbols" Unicode character block.
 1041            * @since 1.2
 1042            */
 1043           public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
 1044               new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
 1045                                "MISCELLANEOUS SYMBOLS",
 1046                                "MISCELLANEOUSSYMBOLS");
 1047   
 1048           /**
 1049            * Constant for the "Dingbats" Unicode character block.
 1050            * @since 1.2
 1051            */
 1052           public static final UnicodeBlock DINGBATS =
 1053               new UnicodeBlock("DINGBATS");
 1054   
 1055           /**
 1056            * Constant for the "CJK Symbols and Punctuation" Unicode character block.
 1057            * @since 1.2
 1058            */
 1059           public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
 1060               new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
 1061                                "CJK SYMBOLS AND PUNCTUATION",
 1062                                "CJKSYMBOLSANDPUNCTUATION");
 1063   
 1064           /**
 1065            * Constant for the "Hiragana" Unicode character block.
 1066            * @since 1.2
 1067            */
 1068           public static final UnicodeBlock HIRAGANA =
 1069               new UnicodeBlock("HIRAGANA");
 1070   
 1071           /**
 1072            * Constant for the "Katakana" Unicode character block.
 1073            * @since 1.2
 1074            */
 1075           public static final UnicodeBlock KATAKANA =
 1076               new UnicodeBlock("KATAKANA");
 1077   
 1078           /**
 1079            * Constant for the "Bopomofo" Unicode character block.
 1080            * @since 1.2
 1081            */
 1082           public static final UnicodeBlock BOPOMOFO =
 1083               new UnicodeBlock("BOPOMOFO");
 1084   
 1085           /**
 1086            * Constant for the "Hangul Compatibility Jamo" Unicode character block.
 1087            * @since 1.2
 1088            */
 1089           public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
 1090               new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
 1091                                "HANGUL COMPATIBILITY JAMO",
 1092                                "HANGULCOMPATIBILITYJAMO");
 1093   
 1094           /**
 1095            * Constant for the "Kanbun" Unicode character block.
 1096            * @since 1.2
 1097            */
 1098           public static final UnicodeBlock KANBUN =
 1099               new UnicodeBlock("KANBUN");
 1100   
 1101           /**
 1102            * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
 1103            * @since 1.2
 1104            */
 1105           public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
 1106               new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
 1107                                "ENCLOSED CJK LETTERS AND MONTHS",
 1108                                "ENCLOSEDCJKLETTERSANDMONTHS");
 1109   
 1110           /**
 1111            * Constant for the "CJK Compatibility" Unicode character block.
 1112            * @since 1.2
 1113            */
 1114           public static final UnicodeBlock CJK_COMPATIBILITY =
 1115               new UnicodeBlock("CJK_COMPATIBILITY",
 1116                                "CJK COMPATIBILITY",
 1117                                "CJKCOMPATIBILITY");
 1118   
 1119           /**
 1120            * Constant for the "CJK Unified Ideographs" Unicode character block.
 1121            * @since 1.2
 1122            */
 1123           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
 1124               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
 1125                                "CJK UNIFIED IDEOGRAPHS",
 1126                                "CJKUNIFIEDIDEOGRAPHS");
 1127   
 1128           /**
 1129            * Constant for the "Hangul Syllables" Unicode character block.
 1130            * @since 1.2
 1131            */
 1132           public static final UnicodeBlock HANGUL_SYLLABLES =
 1133               new UnicodeBlock("HANGUL_SYLLABLES",
 1134                                "HANGUL SYLLABLES",
 1135                                "HANGULSYLLABLES");
 1136   
 1137           /**
 1138            * Constant for the "Private Use Area" Unicode character block.
 1139            * @since 1.2
 1140            */
 1141           public static final UnicodeBlock PRIVATE_USE_AREA =
 1142               new UnicodeBlock("PRIVATE_USE_AREA",
 1143                                "PRIVATE USE AREA",
 1144                                "PRIVATEUSEAREA");
 1145   
 1146           /**
 1147            * Constant for the "CJK Compatibility Ideographs" Unicode character
 1148            * block.
 1149            * @since 1.2
 1150            */
 1151           public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
 1152               new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
 1153                                "CJK COMPATIBILITY IDEOGRAPHS",
 1154                                "CJKCOMPATIBILITYIDEOGRAPHS");
 1155   
 1156           /**
 1157            * Constant for the "Alphabetic Presentation Forms" Unicode character block.
 1158            * @since 1.2
 1159            */
 1160           public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
 1161               new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
 1162                                "ALPHABETIC PRESENTATION FORMS",
 1163                                "ALPHABETICPRESENTATIONFORMS");
 1164   
 1165           /**
 1166            * Constant for the "Arabic Presentation Forms-A" Unicode character
 1167            * block.
 1168            * @since 1.2
 1169            */
 1170           public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
 1171               new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
 1172                                "ARABIC PRESENTATION FORMS-A",
 1173                                "ARABICPRESENTATIONFORMS-A");
 1174   
 1175           /**
 1176            * Constant for the "Combining Half Marks" Unicode character block.
 1177            * @since 1.2
 1178            */
 1179           public static final UnicodeBlock COMBINING_HALF_MARKS =
 1180               new UnicodeBlock("COMBINING_HALF_MARKS",
 1181                                "COMBINING HALF MARKS",
 1182                                "COMBININGHALFMARKS");
 1183   
 1184           /**
 1185            * Constant for the "CJK Compatibility Forms" Unicode character block.
 1186            * @since 1.2
 1187            */
 1188           public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
 1189               new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
 1190                                "CJK COMPATIBILITY FORMS",
 1191                                "CJKCOMPATIBILITYFORMS");
 1192   
 1193           /**
 1194            * Constant for the "Small Form Variants" Unicode character block.
 1195            * @since 1.2
 1196            */
 1197           public static final UnicodeBlock SMALL_FORM_VARIANTS =
 1198               new UnicodeBlock("SMALL_FORM_VARIANTS",
 1199                                "SMALL FORM VARIANTS",
 1200                                "SMALLFORMVARIANTS");
 1201   
 1202           /**
 1203            * Constant for the "Arabic Presentation Forms-B" Unicode character block.
 1204            * @since 1.2
 1205            */
 1206           public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
 1207               new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
 1208                                "ARABIC PRESENTATION FORMS-B",
 1209                                "ARABICPRESENTATIONFORMS-B");
 1210   
 1211           /**
 1212            * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
 1213            * block.
 1214            * @since 1.2
 1215            */
 1216           public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
 1217               new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
 1218                                "HALFWIDTH AND FULLWIDTH FORMS",
 1219                                "HALFWIDTHANDFULLWIDTHFORMS");
 1220   
 1221           /**
 1222            * Constant for the "Specials" Unicode character block.
 1223            * @since 1.2
 1224            */
 1225           public static final UnicodeBlock SPECIALS =
 1226               new UnicodeBlock("SPECIALS");
 1227   
 1228           /**
 1229            * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
 1230            *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
 1231            *             {@link #LOW_SURROGATES}. These new constants match
 1232            *             the block definitions of the Unicode Standard.
 1233            *             The {@link #of(char)} and {@link #of(int)} methods
 1234            *             return the new constants, not SURROGATES_AREA.
 1235            */
 1236           @Deprecated
 1237           public static final UnicodeBlock SURROGATES_AREA =
 1238               new UnicodeBlock("SURROGATES_AREA");
 1239   
 1240           /**
 1241            * Constant for the "Syriac" Unicode character block.
 1242            * @since 1.4
 1243            */
 1244           public static final UnicodeBlock SYRIAC =
 1245               new UnicodeBlock("SYRIAC");
 1246   
 1247           /**
 1248            * Constant for the "Thaana" Unicode character block.
 1249            * @since 1.4
 1250            */
 1251           public static final UnicodeBlock THAANA =
 1252               new UnicodeBlock("THAANA");
 1253   
 1254           /**
 1255            * Constant for the "Sinhala" Unicode character block.
 1256            * @since 1.4
 1257            */
 1258           public static final UnicodeBlock SINHALA =
 1259               new UnicodeBlock("SINHALA");
 1260   
 1261           /**
 1262            * Constant for the "Myanmar" Unicode character block.
 1263            * @since 1.4
 1264            */
 1265           public static final UnicodeBlock MYANMAR =
 1266               new UnicodeBlock("MYANMAR");
 1267   
 1268           /**
 1269            * Constant for the "Ethiopic" Unicode character block.
 1270            * @since 1.4
 1271            */
 1272           public static final UnicodeBlock ETHIOPIC =
 1273               new UnicodeBlock("ETHIOPIC");
 1274   
 1275           /**
 1276            * Constant for the "Cherokee" Unicode character block.
 1277            * @since 1.4
 1278            */
 1279           public static final UnicodeBlock CHEROKEE =
 1280               new UnicodeBlock("CHEROKEE");
 1281   
 1282           /**
 1283            * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
 1284            * @since 1.4
 1285            */
 1286           public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
 1287               new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
 1288                                "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
 1289                                "UNIFIEDCANADIANABORIGINALSYLLABICS");
 1290   
 1291           /**
 1292            * Constant for the "Ogham" Unicode character block.
 1293            * @since 1.4
 1294            */
 1295           public static final UnicodeBlock OGHAM =
 1296               new UnicodeBlock("OGHAM");
 1297   
 1298           /**
 1299            * Constant for the "Runic" Unicode character block.
 1300            * @since 1.4
 1301            */
 1302           public static final UnicodeBlock RUNIC =
 1303               new UnicodeBlock("RUNIC");
 1304   
 1305           /**
 1306            * Constant for the "Khmer" Unicode character block.
 1307            * @since 1.4
 1308            */
 1309           public static final UnicodeBlock KHMER =
 1310               new UnicodeBlock("KHMER");
 1311   
 1312           /**
 1313            * Constant for the "Mongolian" Unicode character block.
 1314            * @since 1.4
 1315            */
 1316           public static final UnicodeBlock MONGOLIAN =
 1317               new UnicodeBlock("MONGOLIAN");
 1318   
 1319           /**
 1320            * Constant for the "Braille Patterns" Unicode character block.
 1321            * @since 1.4
 1322            */
 1323           public static final UnicodeBlock BRAILLE_PATTERNS =
 1324               new UnicodeBlock("BRAILLE_PATTERNS",
 1325                                "BRAILLE PATTERNS",
 1326                                "BRAILLEPATTERNS");
 1327   
 1328           /**
 1329            * Constant for the "CJK Radicals Supplement" Unicode character block.
 1330            * @since 1.4
 1331            */
 1332           public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
 1333               new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
 1334                                "CJK RADICALS SUPPLEMENT",
 1335                                "CJKRADICALSSUPPLEMENT");
 1336   
 1337           /**
 1338            * Constant for the "Kangxi Radicals" Unicode character block.
 1339            * @since 1.4
 1340            */
 1341           public static final UnicodeBlock KANGXI_RADICALS =
 1342               new UnicodeBlock("KANGXI_RADICALS",
 1343                                "KANGXI RADICALS",
 1344                                "KANGXIRADICALS");
 1345   
 1346           /**
 1347            * Constant for the "Ideographic Description Characters" Unicode character block.
 1348            * @since 1.4
 1349            */
 1350           public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
 1351               new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
 1352                                "IDEOGRAPHIC DESCRIPTION CHARACTERS",
 1353                                "IDEOGRAPHICDESCRIPTIONCHARACTERS");
 1354   
 1355           /**
 1356            * Constant for the "Bopomofo Extended" Unicode character block.
 1357            * @since 1.4
 1358            */
 1359           public static final UnicodeBlock BOPOMOFO_EXTENDED =
 1360               new UnicodeBlock("BOPOMOFO_EXTENDED",
 1361                                "BOPOMOFO EXTENDED",
 1362                                "BOPOMOFOEXTENDED");
 1363   
 1364           /**
 1365            * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
 1366            * @since 1.4
 1367            */
 1368           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
 1369               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
 1370                                "CJK UNIFIED IDEOGRAPHS EXTENSION A",
 1371                                "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
 1372   
 1373           /**
 1374            * Constant for the "Yi Syllables" Unicode character block.
 1375            * @since 1.4
 1376            */
 1377           public static final UnicodeBlock YI_SYLLABLES =
 1378               new UnicodeBlock("YI_SYLLABLES",
 1379                                "YI SYLLABLES",
 1380                                "YISYLLABLES");
 1381   
 1382           /**
 1383            * Constant for the "Yi Radicals" Unicode character block.
 1384            * @since 1.4
 1385            */
 1386           public static final UnicodeBlock YI_RADICALS =
 1387               new UnicodeBlock("YI_RADICALS",
 1388                                "YI RADICALS",
 1389                                "YIRADICALS");
 1390   
 1391           /**
 1392            * Constant for the "Cyrillic Supplementary" Unicode character block.
 1393            * @since 1.5
 1394            */
 1395           public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
 1396               new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
 1397                                "CYRILLIC SUPPLEMENTARY",
 1398                                "CYRILLICSUPPLEMENTARY",
 1399                                "CYRILLIC SUPPLEMENT",
 1400                                "CYRILLICSUPPLEMENT");
 1401   
 1402           /**
 1403            * Constant for the "Tagalog" Unicode character block.
 1404            * @since 1.5
 1405            */
 1406           public static final UnicodeBlock TAGALOG =
 1407               new UnicodeBlock("TAGALOG");
 1408   
 1409           /**
 1410            * Constant for the "Hanunoo" Unicode character block.
 1411            * @since 1.5
 1412            */
 1413           public static final UnicodeBlock HANUNOO =
 1414               new UnicodeBlock("HANUNOO");
 1415   
 1416           /**
 1417            * Constant for the "Buhid" Unicode character block.
 1418            * @since 1.5
 1419            */
 1420           public static final UnicodeBlock BUHID =
 1421               new UnicodeBlock("BUHID");
 1422   
 1423           /**
 1424            * Constant for the "Tagbanwa" Unicode character block.
 1425            * @since 1.5
 1426            */
 1427           public static final UnicodeBlock TAGBANWA =
 1428               new UnicodeBlock("TAGBANWA");
 1429   
 1430           /**
 1431            * Constant for the "Limbu" Unicode character block.
 1432            * @since 1.5
 1433            */
 1434           public static final UnicodeBlock LIMBU =
 1435               new UnicodeBlock("LIMBU");
 1436   
 1437           /**
 1438            * Constant for the "Tai Le" Unicode character block.
 1439            * @since 1.5
 1440            */
 1441           public static final UnicodeBlock TAI_LE =
 1442               new UnicodeBlock("TAI_LE",
 1443                                "TAI LE",
 1444                                "TAILE");
 1445   
 1446           /**
 1447            * Constant for the "Khmer Symbols" Unicode character block.
 1448            * @since 1.5
 1449            */
 1450           public static final UnicodeBlock KHMER_SYMBOLS =
 1451               new UnicodeBlock("KHMER_SYMBOLS",
 1452                                "KHMER SYMBOLS",
 1453                                "KHMERSYMBOLS");
 1454   
 1455           /**
 1456            * Constant for the "Phonetic Extensions" Unicode character block.
 1457            * @since 1.5
 1458            */
 1459           public static final UnicodeBlock PHONETIC_EXTENSIONS =
 1460               new UnicodeBlock("PHONETIC_EXTENSIONS",
 1461                                "PHONETIC EXTENSIONS",
 1462                                "PHONETICEXTENSIONS");
 1463   
 1464           /**
 1465            * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
 1466            * @since 1.5
 1467            */
 1468           public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
 1469               new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
 1470                                "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
 1471                                "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
 1472   
 1473           /**
 1474            * Constant for the "Supplemental Arrows-A" Unicode character block.
 1475            * @since 1.5
 1476            */
 1477           public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
 1478               new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
 1479                                "SUPPLEMENTAL ARROWS-A",
 1480                                "SUPPLEMENTALARROWS-A");
 1481   
 1482           /**
 1483            * Constant for the "Supplemental Arrows-B" Unicode character block.
 1484            * @since 1.5
 1485            */
 1486           public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
 1487               new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
 1488                                "SUPPLEMENTAL ARROWS-B",
 1489                                "SUPPLEMENTALARROWS-B");
 1490   
 1491           /**
 1492            * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
 1493            * character block.
 1494            * @since 1.5
 1495            */
 1496           public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
 1497               new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
 1498                                "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
 1499                                "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
 1500   
 1501           /**
 1502            * Constant for the "Supplemental Mathematical Operators" Unicode
 1503            * character block.
 1504            * @since 1.5
 1505            */
 1506           public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
 1507               new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
 1508                                "SUPPLEMENTAL MATHEMATICAL OPERATORS",
 1509                                "SUPPLEMENTALMATHEMATICALOPERATORS");
 1510   
 1511           /**
 1512            * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
 1513            * block.
 1514            * @since 1.5
 1515            */
 1516           public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
 1517               new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
 1518                                "MISCELLANEOUS SYMBOLS AND ARROWS",
 1519                                "MISCELLANEOUSSYMBOLSANDARROWS");
 1520   
 1521           /**
 1522            * Constant for the "Katakana Phonetic Extensions" Unicode character
 1523            * block.
 1524            * @since 1.5
 1525            */
 1526           public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
 1527               new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
 1528                                "KATAKANA PHONETIC EXTENSIONS",
 1529                                "KATAKANAPHONETICEXTENSIONS");
 1530   
 1531           /**
 1532            * Constant for the "Yijing Hexagram Symbols" Unicode character block.
 1533            * @since 1.5
 1534            */
 1535           public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
 1536               new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
 1537                                "YIJING HEXAGRAM SYMBOLS",
 1538                                "YIJINGHEXAGRAMSYMBOLS");
 1539   
 1540           /**
 1541            * Constant for the "Variation Selectors" Unicode character block.
 1542            * @since 1.5
 1543            */
 1544           public static final UnicodeBlock VARIATION_SELECTORS =
 1545               new UnicodeBlock("VARIATION_SELECTORS",
 1546                                "VARIATION SELECTORS",
 1547                                "VARIATIONSELECTORS");
 1548   
 1549           /**
 1550            * Constant for the "Linear B Syllabary" Unicode character block.
 1551            * @since 1.5
 1552            */
 1553           public static final UnicodeBlock LINEAR_B_SYLLABARY =
 1554               new UnicodeBlock("LINEAR_B_SYLLABARY",
 1555                                "LINEAR B SYLLABARY",
 1556                                "LINEARBSYLLABARY");
 1557   
 1558           /**
 1559            * Constant for the "Linear B Ideograms" Unicode character block.
 1560            * @since 1.5
 1561            */
 1562           public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
 1563               new UnicodeBlock("LINEAR_B_IDEOGRAMS",
 1564                                "LINEAR B IDEOGRAMS",
 1565                                "LINEARBIDEOGRAMS");
 1566   
 1567           /**
 1568            * Constant for the "Aegean Numbers" Unicode character block.
 1569            * @since 1.5
 1570            */
 1571           public static final UnicodeBlock AEGEAN_NUMBERS =
 1572               new UnicodeBlock("AEGEAN_NUMBERS",
 1573                                "AEGEAN NUMBERS",
 1574                                "AEGEANNUMBERS");
 1575   
 1576           /**
 1577            * Constant for the "Old Italic" Unicode character block.
 1578            * @since 1.5
 1579            */
 1580           public static final UnicodeBlock OLD_ITALIC =
 1581               new UnicodeBlock("OLD_ITALIC",
 1582                                "OLD ITALIC",
 1583                                "OLDITALIC");
 1584   
 1585           /**
 1586            * Constant for the "Gothic" Unicode character block.
 1587            * @since 1.5
 1588            */
 1589           public static final UnicodeBlock GOTHIC =
 1590               new UnicodeBlock("GOTHIC");
 1591   
 1592           /**
 1593            * Constant for the "Ugaritic" Unicode character block.
 1594            * @since 1.5
 1595            */
 1596           public static final UnicodeBlock UGARITIC =
 1597               new UnicodeBlock("UGARITIC");
 1598   
 1599           /**
 1600            * Constant for the "Deseret" Unicode character block.
 1601            * @since 1.5
 1602            */
 1603           public static final UnicodeBlock DESERET =
 1604               new UnicodeBlock("DESERET");
 1605   
 1606           /**
 1607            * Constant for the "Shavian" Unicode character block.
 1608            * @since 1.5
 1609            */
 1610           public static final UnicodeBlock SHAVIAN =
 1611               new UnicodeBlock("SHAVIAN");
 1612   
 1613           /**
 1614            * Constant for the "Osmanya" Unicode character block.
 1615            * @since 1.5
 1616            */
 1617           public static final UnicodeBlock OSMANYA =
 1618               new UnicodeBlock("OSMANYA");
 1619   
 1620           /**
 1621            * Constant for the "Cypriot Syllabary" Unicode character block.
 1622            * @since 1.5
 1623            */
 1624           public static final UnicodeBlock CYPRIOT_SYLLABARY =
 1625               new UnicodeBlock("CYPRIOT_SYLLABARY",
 1626                                "CYPRIOT SYLLABARY",
 1627                                "CYPRIOTSYLLABARY");
 1628   
 1629           /**
 1630            * Constant for the "Byzantine Musical Symbols" Unicode character block.
 1631            * @since 1.5
 1632            */
 1633           public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
 1634               new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
 1635                                "BYZANTINE MUSICAL SYMBOLS",
 1636                                "BYZANTINEMUSICALSYMBOLS");
 1637   
 1638           /**
 1639            * Constant for the "Musical Symbols" Unicode character block.
 1640            * @since 1.5
 1641            */
 1642           public static final UnicodeBlock MUSICAL_SYMBOLS =
 1643               new UnicodeBlock("MUSICAL_SYMBOLS",
 1644                                "MUSICAL SYMBOLS",
 1645                                "MUSICALSYMBOLS");
 1646   
 1647           /**
 1648            * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
 1649            * @since 1.5
 1650            */
 1651           public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
 1652               new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
 1653                                "TAI XUAN JING SYMBOLS",
 1654                                "TAIXUANJINGSYMBOLS");
 1655   
 1656           /**
 1657            * Constant for the "Mathematical Alphanumeric Symbols" Unicode
 1658            * character block.
 1659            * @since 1.5
 1660            */
 1661           public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
 1662               new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
 1663                                "MATHEMATICAL ALPHANUMERIC SYMBOLS",
 1664                                "MATHEMATICALALPHANUMERICSYMBOLS");
 1665   
 1666           /**
 1667            * Constant for the "CJK Unified Ideographs Extension B" Unicode
 1668            * character block.
 1669            * @since 1.5
 1670            */
 1671           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
 1672               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
 1673                                "CJK UNIFIED IDEOGRAPHS EXTENSION B",
 1674                                "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
 1675   
 1676           /**
 1677            * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
 1678            * @since 1.5
 1679            */
 1680           public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
 1681               new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
 1682                                "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
 1683                                "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
 1684   
 1685           /**
 1686            * Constant for the "Tags" Unicode character block.
 1687            * @since 1.5
 1688            */
 1689           public static final UnicodeBlock TAGS =
 1690               new UnicodeBlock("TAGS");
 1691   
 1692           /**
 1693            * Constant for the "Variation Selectors Supplement" Unicode character
 1694            * block.
 1695            * @since 1.5
 1696            */
 1697           public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
 1698               new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
 1699                                "VARIATION SELECTORS SUPPLEMENT",
 1700                                "VARIATIONSELECTORSSUPPLEMENT");
 1701   
 1702           /**
 1703            * Constant for the "Supplementary Private Use Area-A" Unicode character
 1704            * block.
 1705            * @since 1.5
 1706            */
 1707           public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
 1708               new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
 1709                                "SUPPLEMENTARY PRIVATE USE AREA-A",
 1710                                "SUPPLEMENTARYPRIVATEUSEAREA-A");
 1711   
 1712           /**
 1713            * Constant for the "Supplementary Private Use Area-B" Unicode character
 1714            * block.
 1715            * @since 1.5
 1716            */
 1717           public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
 1718               new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
 1719                                "SUPPLEMENTARY PRIVATE USE AREA-B",
 1720                                "SUPPLEMENTARYPRIVATEUSEAREA-B");
 1721   
 1722           /**
 1723            * Constant for the "High Surrogates" Unicode character block.
 1724            * This block represents codepoint values in the high surrogate
 1725            * range: U+D800 through U+DB7F
 1726            *
 1727            * @since 1.5
 1728            */
 1729           public static final UnicodeBlock HIGH_SURROGATES =
 1730               new UnicodeBlock("HIGH_SURROGATES",
 1731                                "HIGH SURROGATES",
 1732                                "HIGHSURROGATES");
 1733   
 1734           /**
 1735            * Constant for the "High Private Use Surrogates" Unicode character
 1736            * block.
 1737            * This block represents codepoint values in the private use high
 1738            * surrogate range: U+DB80 through U+DBFF
 1739            *
 1740            * @since 1.5
 1741            */
 1742           public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
 1743               new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
 1744                                "HIGH PRIVATE USE SURROGATES",
 1745                                "HIGHPRIVATEUSESURROGATES");
 1746   
 1747           /**
 1748            * Constant for the "Low Surrogates" Unicode character block.
 1749            * This block represents codepoint values in the low surrogate
 1750            * range: U+DC00 through U+DFFF
 1751            *
 1752            * @since 1.5
 1753            */
 1754           public static final UnicodeBlock LOW_SURROGATES =
 1755               new UnicodeBlock("LOW_SURROGATES",
 1756                                "LOW SURROGATES",
 1757                                "LOWSURROGATES");
 1758   
 1759           /**
 1760            * Constant for the "Arabic Supplement" Unicode character block.
 1761            * @since 1.7
 1762            */
 1763           public static final UnicodeBlock ARABIC_SUPPLEMENT =
 1764               new UnicodeBlock("ARABIC_SUPPLEMENT",
 1765                                "ARABIC SUPPLEMENT",
 1766                                "ARABICSUPPLEMENT");
 1767   
 1768           /**
 1769            * Constant for the "NKo" Unicode character block.
 1770            * @since 1.7
 1771            */
 1772           public static final UnicodeBlock NKO =
 1773               new UnicodeBlock("NKO");
 1774   
 1775           /**
 1776            * Constant for the "Samaritan" Unicode character block.
 1777            * @since 1.7
 1778            */
 1779           public static final UnicodeBlock SAMARITAN =
 1780               new UnicodeBlock("SAMARITAN");
 1781   
 1782           /**
 1783            * Constant for the "Mandaic" Unicode character block.
 1784            * @since 1.7
 1785            */
 1786           public static final UnicodeBlock MANDAIC =
 1787               new UnicodeBlock("MANDAIC");
 1788   
 1789           /**
 1790            * Constant for the "Ethiopic Supplement" Unicode character block.
 1791            * @since 1.7
 1792            */
 1793           public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
 1794               new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
 1795                                "ETHIOPIC SUPPLEMENT",
 1796                                "ETHIOPICSUPPLEMENT");
 1797   
 1798           /**
 1799            * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
 1800            * Unicode character block.
 1801            * @since 1.7
 1802            */
 1803           public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
 1804               new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
 1805                                "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
 1806                                "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
 1807   
 1808           /**
 1809            * Constant for the "New Tai Lue" Unicode character block.
 1810            * @since 1.7
 1811            */
 1812           public static final UnicodeBlock NEW_TAI_LUE =
 1813               new UnicodeBlock("NEW_TAI_LUE",
 1814                                "NEW TAI LUE",
 1815                                "NEWTAILUE");
 1816   
 1817           /**
 1818            * Constant for the "Buginese" Unicode character block.
 1819            * @since 1.7
 1820            */
 1821           public static final UnicodeBlock BUGINESE =
 1822               new UnicodeBlock("BUGINESE");
 1823   
 1824           /**
 1825            * Constant for the "Tai Tham" Unicode character block.
 1826            * @since 1.7
 1827            */
 1828           public static final UnicodeBlock TAI_THAM =
 1829               new UnicodeBlock("TAI_THAM",
 1830                                "TAI THAM",
 1831                                "TAITHAM");
 1832   
 1833           /**
 1834            * Constant for the "Balinese" Unicode character block.
 1835            * @since 1.7
 1836            */
 1837           public static final UnicodeBlock BALINESE =
 1838               new UnicodeBlock("BALINESE");
 1839   
 1840           /**
 1841            * Constant for the "Sundanese" Unicode character block.
 1842            * @since 1.7
 1843            */
 1844           public static final UnicodeBlock SUNDANESE =
 1845               new UnicodeBlock("SUNDANESE");
 1846   
 1847           /**
 1848            * Constant for the "Batak" Unicode character block.
 1849            * @since 1.7
 1850            */
 1851           public static final UnicodeBlock BATAK =
 1852               new UnicodeBlock("BATAK");
 1853   
 1854           /**
 1855            * Constant for the "Lepcha" Unicode character block.
 1856            * @since 1.7
 1857            */
 1858           public static final UnicodeBlock LEPCHA =
 1859               new UnicodeBlock("LEPCHA");
 1860   
 1861           /**
 1862            * Constant for the "Ol Chiki" Unicode character block.
 1863            * @since 1.7
 1864            */
 1865           public static final UnicodeBlock OL_CHIKI =
 1866               new UnicodeBlock("OL_CHIKI",
 1867                                "OL CHIKI",
 1868                                "OLCHIKI");
 1869   
 1870           /**
 1871            * Constant for the "Vedic Extensions" Unicode character block.
 1872            * @since 1.7
 1873            */
 1874           public static final UnicodeBlock VEDIC_EXTENSIONS =
 1875               new UnicodeBlock("VEDIC_EXTENSIONS",
 1876                                "VEDIC EXTENSIONS",
 1877                                "VEDICEXTENSIONS");
 1878   
 1879           /**
 1880            * Constant for the "Phonetic Extensions Supplement" Unicode character
 1881            * block.
 1882            * @since 1.7
 1883            */
 1884           public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
 1885               new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
 1886                                "PHONETIC EXTENSIONS SUPPLEMENT",
 1887                                "PHONETICEXTENSIONSSUPPLEMENT");
 1888   
 1889           /**
 1890            * Constant for the "Combining Diacritical Marks Supplement" Unicode
 1891            * character block.
 1892            * @since 1.7
 1893            */
 1894           public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
 1895               new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
 1896                                "COMBINING DIACRITICAL MARKS SUPPLEMENT",
 1897                                "COMBININGDIACRITICALMARKSSUPPLEMENT");
 1898   
 1899           /**
 1900            * Constant for the "Glagolitic" Unicode character block.
 1901            * @since 1.7
 1902            */
 1903           public static final UnicodeBlock GLAGOLITIC =
 1904               new UnicodeBlock("GLAGOLITIC");
 1905   
 1906           /**
 1907            * Constant for the "Latin Extended-C" Unicode character block.
 1908            * @since 1.7
 1909            */
 1910           public static final UnicodeBlock LATIN_EXTENDED_C =
 1911               new UnicodeBlock("LATIN_EXTENDED_C",
 1912                                "LATIN EXTENDED-C",
 1913                                "LATINEXTENDED-C");
 1914   
 1915           /**
 1916            * Constant for the "Coptic" Unicode character block.
 1917            * @since 1.7
 1918            */
 1919           public static final UnicodeBlock COPTIC =
 1920               new UnicodeBlock("COPTIC");
 1921   
 1922           /**
 1923            * Constant for the "Georgian Supplement" Unicode character block.
 1924            * @since 1.7
 1925            */
 1926           public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
 1927               new UnicodeBlock("GEORGIAN_SUPPLEMENT",
 1928                                "GEORGIAN SUPPLEMENT",
 1929                                "GEORGIANSUPPLEMENT");
 1930   
 1931           /**
 1932            * Constant for the "Tifinagh" Unicode character block.
 1933            * @since 1.7
 1934            */
 1935           public static final UnicodeBlock TIFINAGH =
 1936               new UnicodeBlock("TIFINAGH");
 1937   
 1938           /**
 1939            * Constant for the "Ethiopic Extended" Unicode character block.
 1940            * @since 1.7
 1941            */
 1942           public static final UnicodeBlock ETHIOPIC_EXTENDED =
 1943               new UnicodeBlock("ETHIOPIC_EXTENDED",
 1944                                "ETHIOPIC EXTENDED",
 1945                                "ETHIOPICEXTENDED");
 1946   
 1947           /**
 1948            * Constant for the "Cyrillic Extended-A" Unicode character block.
 1949            * @since 1.7
 1950            */
 1951           public static final UnicodeBlock CYRILLIC_EXTENDED_A =
 1952               new UnicodeBlock("CYRILLIC_EXTENDED_A",
 1953                                "CYRILLIC EXTENDED-A",
 1954                                "CYRILLICEXTENDED-A");
 1955   
 1956           /**
 1957            * Constant for the "Supplemental Punctuation" Unicode character block.
 1958            * @since 1.7
 1959            */
 1960           public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
 1961               new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
 1962                                "SUPPLEMENTAL PUNCTUATION",
 1963                                "SUPPLEMENTALPUNCTUATION");
 1964   
 1965           /**
 1966            * Constant for the "CJK Strokes" Unicode character block.
 1967            * @since 1.7
 1968            */
 1969           public static final UnicodeBlock CJK_STROKES =
 1970               new UnicodeBlock("CJK_STROKES",
 1971                                "CJK STROKES",
 1972                                "CJKSTROKES");
 1973   
 1974           /**
 1975            * Constant for the "Lisu" Unicode character block.
 1976            * @since 1.7
 1977            */
 1978           public static final UnicodeBlock LISU =
 1979               new UnicodeBlock("LISU");
 1980   
 1981           /**
 1982            * Constant for the "Vai" Unicode character block.
 1983            * @since 1.7
 1984            */
 1985           public static final UnicodeBlock VAI =
 1986               new UnicodeBlock("VAI");
 1987   
 1988           /**
 1989            * Constant for the "Cyrillic Extended-B" Unicode character block.
 1990            * @since 1.7
 1991            */
 1992           public static final UnicodeBlock CYRILLIC_EXTENDED_B =
 1993               new UnicodeBlock("CYRILLIC_EXTENDED_B",
 1994                                "CYRILLIC EXTENDED-B",
 1995                                "CYRILLICEXTENDED-B");
 1996   
 1997           /**
 1998            * Constant for the "Bamum" Unicode character block.
 1999            * @since 1.7
 2000            */
 2001           public static final UnicodeBlock BAMUM =
 2002               new UnicodeBlock("BAMUM");
 2003   
 2004           /**
 2005            * Constant for the "Modifier Tone Letters" Unicode character block.
 2006            * @since 1.7
 2007            */
 2008           public static final UnicodeBlock MODIFIER_TONE_LETTERS =
 2009               new UnicodeBlock("MODIFIER_TONE_LETTERS",
 2010                                "MODIFIER TONE LETTERS",
 2011                                "MODIFIERTONELETTERS");
 2012   
 2013           /**
 2014            * Constant for the "Latin Extended-D" Unicode character block.
 2015            * @since 1.7
 2016            */
 2017           public static final UnicodeBlock LATIN_EXTENDED_D =
 2018               new UnicodeBlock("LATIN_EXTENDED_D",
 2019                                "LATIN EXTENDED-D",
 2020                                "LATINEXTENDED-D");
 2021   
 2022           /**
 2023            * Constant for the "Syloti Nagri" Unicode character block.
 2024            * @since 1.7
 2025            */
 2026           public static final UnicodeBlock SYLOTI_NAGRI =
 2027               new UnicodeBlock("SYLOTI_NAGRI",
 2028                                "SYLOTI NAGRI",
 2029                                "SYLOTINAGRI");
 2030   
 2031           /**
 2032            * Constant for the "Common Indic Number Forms" Unicode character block.
 2033            * @since 1.7
 2034            */
 2035           public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
 2036               new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
 2037                                "COMMON INDIC NUMBER FORMS",
 2038                                "COMMONINDICNUMBERFORMS");
 2039   
 2040           /**
 2041            * Constant for the "Phags-pa" Unicode character block.
 2042            * @since 1.7
 2043            */
 2044           public static final UnicodeBlock PHAGS_PA =
 2045               new UnicodeBlock("PHAGS_PA",
 2046                                "PHAGS-PA");
 2047   
 2048           /**
 2049            * Constant for the "Saurashtra" Unicode character block.
 2050            * @since 1.7
 2051            */
 2052           public static final UnicodeBlock SAURASHTRA =
 2053               new UnicodeBlock("SAURASHTRA");
 2054   
 2055           /**
 2056            * Constant for the "Devanagari Extended" Unicode character block.
 2057            * @since 1.7
 2058            */
 2059           public static final UnicodeBlock DEVANAGARI_EXTENDED =
 2060               new UnicodeBlock("DEVANAGARI_EXTENDED",
 2061                                "DEVANAGARI EXTENDED",
 2062                                "DEVANAGARIEXTENDED");
 2063   
 2064           /**
 2065            * Constant for the "Kayah Li" Unicode character block.
 2066            * @since 1.7
 2067            */
 2068           public static final UnicodeBlock KAYAH_LI =
 2069               new UnicodeBlock("KAYAH_LI",
 2070                                "KAYAH LI",
 2071                                "KAYAHLI");
 2072   
 2073           /**
 2074            * Constant for the "Rejang" Unicode character block.
 2075            * @since 1.7
 2076            */
 2077           public static final UnicodeBlock REJANG =
 2078               new UnicodeBlock("REJANG");
 2079   
 2080           /**
 2081            * Constant for the "Hangul Jamo Extended-A" Unicode character block.
 2082            * @since 1.7
 2083            */
 2084           public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
 2085               new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
 2086                                "HANGUL JAMO EXTENDED-A",
 2087                                "HANGULJAMOEXTENDED-A");
 2088   
 2089           /**
 2090            * Constant for the "Javanese" Unicode character block.
 2091            * @since 1.7
 2092            */
 2093           public static final UnicodeBlock JAVANESE =
 2094               new UnicodeBlock("JAVANESE");
 2095   
 2096           /**
 2097            * Constant for the "Cham" Unicode character block.
 2098            * @since 1.7
 2099            */
 2100           public static final UnicodeBlock CHAM =
 2101               new UnicodeBlock("CHAM");
 2102   
 2103           /**
 2104            * Constant for the "Myanmar Extended-A" Unicode character block.
 2105            * @since 1.7
 2106            */
 2107           public static final UnicodeBlock MYANMAR_EXTENDED_A =
 2108               new UnicodeBlock("MYANMAR_EXTENDED_A",
 2109                                "MYANMAR EXTENDED-A",
 2110                                "MYANMAREXTENDED-A");
 2111   
 2112           /**
 2113            * Constant for the "Tai Viet" Unicode character block.
 2114            * @since 1.7
 2115            */
 2116           public static final UnicodeBlock TAI_VIET =
 2117               new UnicodeBlock("TAI_VIET",
 2118                                "TAI VIET",
 2119                                "TAIVIET");
 2120   
 2121           /**
 2122            * Constant for the "Ethiopic Extended-A" Unicode character block.
 2123            * @since 1.7
 2124            */
 2125           public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
 2126               new UnicodeBlock("ETHIOPIC_EXTENDED_A",
 2127                                "ETHIOPIC EXTENDED-A",
 2128                                "ETHIOPICEXTENDED-A");
 2129   
 2130           /**
 2131            * Constant for the "Meetei Mayek" Unicode character block.
 2132            * @since 1.7
 2133            */
 2134           public static final UnicodeBlock MEETEI_MAYEK =
 2135               new UnicodeBlock("MEETEI_MAYEK",
 2136                                "MEETEI MAYEK",
 2137                                "MEETEIMAYEK");
 2138   
 2139           /**
 2140            * Constant for the "Hangul Jamo Extended-B" Unicode character block.
 2141            * @since 1.7
 2142            */
 2143           public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
 2144               new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
 2145                                "HANGUL JAMO EXTENDED-B",
 2146                                "HANGULJAMOEXTENDED-B");
 2147   
 2148           /**
 2149            * Constant for the "Vertical Forms" Unicode character block.
 2150            * @since 1.7
 2151            */
 2152           public static final UnicodeBlock VERTICAL_FORMS =
 2153               new UnicodeBlock("VERTICAL_FORMS",
 2154                                "VERTICAL FORMS",
 2155                                "VERTICALFORMS");
 2156   
 2157           /**
 2158            * Constant for the "Ancient Greek Numbers" Unicode character block.
 2159            * @since 1.7
 2160            */
 2161           public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
 2162               new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
 2163                                "ANCIENT GREEK NUMBERS",
 2164                                "ANCIENTGREEKNUMBERS");
 2165   
 2166           /**
 2167            * Constant for the "Ancient Symbols" Unicode character block.
 2168            * @since 1.7
 2169            */
 2170           public static final UnicodeBlock ANCIENT_SYMBOLS =
 2171               new UnicodeBlock("ANCIENT_SYMBOLS",
 2172                                "ANCIENT SYMBOLS",
 2173                                "ANCIENTSYMBOLS");
 2174   
 2175           /**
 2176            * Constant for the "Phaistos Disc" Unicode character block.
 2177            * @since 1.7
 2178            */
 2179           public static final UnicodeBlock PHAISTOS_DISC =
 2180               new UnicodeBlock("PHAISTOS_DISC",
 2181                                "PHAISTOS DISC",
 2182                                "PHAISTOSDISC");
 2183   
 2184           /**
 2185            * Constant for the "Lycian" Unicode character block.
 2186            * @since 1.7
 2187            */
 2188           public static final UnicodeBlock LYCIAN =
 2189               new UnicodeBlock("LYCIAN");
 2190   
 2191           /**
 2192            * Constant for the "Carian" Unicode character block.
 2193            * @since 1.7
 2194            */
 2195           public static final UnicodeBlock CARIAN =
 2196               new UnicodeBlock("CARIAN");
 2197   
 2198           /**
 2199            * Constant for the "Old Persian" Unicode character block.
 2200            * @since 1.7
 2201            */
 2202           public static final UnicodeBlock OLD_PERSIAN =
 2203               new UnicodeBlock("OLD_PERSIAN",
 2204                                "OLD PERSIAN",
 2205                                "OLDPERSIAN");
 2206   
 2207           /**
 2208            * Constant for the "Imperial Aramaic" Unicode character block.
 2209            * @since 1.7
 2210            */
 2211           public static final UnicodeBlock IMPERIAL_ARAMAIC =
 2212               new UnicodeBlock("IMPERIAL_ARAMAIC",
 2213                                "IMPERIAL ARAMAIC",
 2214                                "IMPERIALARAMAIC");
 2215   
 2216           /**
 2217            * Constant for the "Phoenician" Unicode character block.
 2218            * @since 1.7
 2219            */
 2220           public static final UnicodeBlock PHOENICIAN =
 2221               new UnicodeBlock("PHOENICIAN");
 2222   
 2223           /**
 2224            * Constant for the "Lydian" Unicode character block.
 2225            * @since 1.7
 2226            */
 2227           public static final UnicodeBlock LYDIAN =
 2228               new UnicodeBlock("LYDIAN");
 2229   
 2230           /**
 2231            * Constant for the "Kharoshthi" Unicode character block.
 2232            * @since 1.7
 2233            */
 2234           public static final UnicodeBlock KHAROSHTHI =
 2235               new UnicodeBlock("KHAROSHTHI");
 2236   
 2237           /**
 2238            * Constant for the "Old South Arabian" Unicode character block.
 2239            * @since 1.7
 2240            */
 2241           public static final UnicodeBlock OLD_SOUTH_ARABIAN =
 2242               new UnicodeBlock("OLD_SOUTH_ARABIAN",
 2243                                "OLD SOUTH ARABIAN",
 2244                                "OLDSOUTHARABIAN");
 2245   
 2246           /**
 2247            * Constant for the "Avestan" Unicode character block.
 2248            * @since 1.7
 2249            */
 2250           public static final UnicodeBlock AVESTAN =
 2251               new UnicodeBlock("AVESTAN");
 2252   
 2253           /**
 2254            * Constant for the "Inscriptional Parthian" Unicode character block.
 2255            * @since 1.7
 2256            */
 2257           public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
 2258               new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
 2259                                "INSCRIPTIONAL PARTHIAN",
 2260                                "INSCRIPTIONALPARTHIAN");
 2261   
 2262           /**
 2263            * Constant for the "Inscriptional Pahlavi" Unicode character block.
 2264            * @since 1.7
 2265            */
 2266           public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
 2267               new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
 2268                                "INSCRIPTIONAL PAHLAVI",
 2269                                "INSCRIPTIONALPAHLAVI");
 2270   
 2271           /**
 2272            * Constant for the "Old Turkic" Unicode character block.
 2273            * @since 1.7
 2274            */
 2275           public static final UnicodeBlock OLD_TURKIC =
 2276               new UnicodeBlock("OLD_TURKIC",
 2277                                "OLD TURKIC",
 2278                                "OLDTURKIC");
 2279   
 2280           /**
 2281            * Constant for the "Rumi Numeral Symbols" Unicode character block.
 2282            * @since 1.7
 2283            */
 2284           public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
 2285               new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
 2286                                "RUMI NUMERAL SYMBOLS",
 2287                                "RUMINUMERALSYMBOLS");
 2288   
 2289           /**
 2290            * Constant for the "Brahmi" Unicode character block.
 2291            * @since 1.7
 2292            */
 2293           public static final UnicodeBlock BRAHMI =
 2294               new UnicodeBlock("BRAHMI");
 2295   
 2296           /**
 2297            * Constant for the "Kaithi" Unicode character block.
 2298            * @since 1.7
 2299            */
 2300           public static final UnicodeBlock KAITHI =
 2301               new UnicodeBlock("KAITHI");
 2302   
 2303           /**
 2304            * Constant for the "Cuneiform" Unicode character block.
 2305            * @since 1.7
 2306            */
 2307           public static final UnicodeBlock CUNEIFORM =
 2308               new UnicodeBlock("CUNEIFORM");
 2309   
 2310           /**
 2311            * Constant for the "Cuneiform Numbers and Punctuation" Unicode
 2312            * character block.
 2313            * @since 1.7
 2314            */
 2315           public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
 2316               new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
 2317                                "CUNEIFORM NUMBERS AND PUNCTUATION",
 2318                                "CUNEIFORMNUMBERSANDPUNCTUATION");
 2319   
 2320           /**
 2321            * Constant for the "Egyptian Hieroglyphs" Unicode character block.
 2322            * @since 1.7
 2323            */
 2324           public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
 2325               new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
 2326                                "EGYPTIAN HIEROGLYPHS",
 2327                                "EGYPTIANHIEROGLYPHS");
 2328   
 2329           /**
 2330            * Constant for the "Bamum Supplement" Unicode character block.
 2331            * @since 1.7
 2332            */
 2333           public static final UnicodeBlock BAMUM_SUPPLEMENT =
 2334               new UnicodeBlock("BAMUM_SUPPLEMENT",
 2335                                "BAMUM SUPPLEMENT",
 2336                                "BAMUMSUPPLEMENT");
 2337   
 2338           /**
 2339            * Constant for the "Kana Supplement" Unicode character block.
 2340            * @since 1.7
 2341            */
 2342           public static final UnicodeBlock KANA_SUPPLEMENT =
 2343               new UnicodeBlock("KANA_SUPPLEMENT",
 2344                                "KANA SUPPLEMENT",
 2345                                "KANASUPPLEMENT");
 2346   
 2347           /**
 2348            * Constant for the "Ancient Greek Musical Notation" Unicode character
 2349            * block.
 2350            * @since 1.7
 2351            */
 2352           public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
 2353               new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
 2354                                "ANCIENT GREEK MUSICAL NOTATION",
 2355                                "ANCIENTGREEKMUSICALNOTATION");
 2356   
 2357           /**
 2358            * Constant for the "Counting Rod Numerals" Unicode character block.
 2359            * @since 1.7
 2360            */
 2361           public static final UnicodeBlock COUNTING_ROD_NUMERALS =
 2362               new UnicodeBlock("COUNTING_ROD_NUMERALS",
 2363                                "COUNTING ROD NUMERALS",
 2364                                "COUNTINGRODNUMERALS");
 2365   
 2366           /**
 2367            * Constant for the "Mahjong Tiles" Unicode character block.
 2368            * @since 1.7
 2369            */
 2370           public static final UnicodeBlock MAHJONG_TILES =
 2371               new UnicodeBlock("MAHJONG_TILES",
 2372                                "MAHJONG TILES",
 2373                                "MAHJONGTILES");
 2374   
 2375           /**
 2376            * Constant for the "Domino Tiles" Unicode character block.
 2377            * @since 1.7
 2378            */
 2379           public static final UnicodeBlock DOMINO_TILES =
 2380               new UnicodeBlock("DOMINO_TILES",
 2381                                "DOMINO TILES",
 2382                                "DOMINOTILES");
 2383   
 2384           /**
 2385            * Constant for the "Playing Cards" Unicode character block.
 2386            * @since 1.7
 2387            */
 2388           public static final UnicodeBlock PLAYING_CARDS =
 2389               new UnicodeBlock("PLAYING_CARDS",
 2390                                "PLAYING CARDS",
 2391                                "PLAYINGCARDS");
 2392   
 2393           /**
 2394            * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
 2395            * block.
 2396            * @since 1.7
 2397            */
 2398           public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
 2399               new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
 2400                                "ENCLOSED ALPHANUMERIC SUPPLEMENT",
 2401                                "ENCLOSEDALPHANUMERICSUPPLEMENT");
 2402   
 2403           /**
 2404            * Constant for the "Enclosed Ideographic Supplement" Unicode character
 2405            * block.
 2406            * @since 1.7
 2407            */
 2408           public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
 2409               new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
 2410                                "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
 2411                                "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
 2412   
 2413           /**
 2414            * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
 2415            * character block.
 2416            * @since 1.7
 2417            */
 2418           public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
 2419               new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
 2420                                "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
 2421                                "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
 2422   
 2423           /**
 2424            * Constant for the "Emoticons" Unicode character block.
 2425            * @since 1.7
 2426            */
 2427           public static final UnicodeBlock EMOTICONS =
 2428               new UnicodeBlock("EMOTICONS");
 2429   
 2430           /**
 2431            * Constant for the "Transport And Map Symbols" Unicode character block.
 2432            * @since 1.7
 2433            */
 2434           public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
 2435               new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
 2436                                "TRANSPORT AND MAP SYMBOLS",
 2437                                "TRANSPORTANDMAPSYMBOLS");
 2438   
 2439           /**
 2440            * Constant for the "Alchemical Symbols" Unicode character block.
 2441            * @since 1.7
 2442            */
 2443           public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
 2444               new UnicodeBlock("ALCHEMICAL_SYMBOLS",
 2445                                "ALCHEMICAL SYMBOLS",
 2446                                "ALCHEMICALSYMBOLS");
 2447   
 2448           /**
 2449            * Constant for the "CJK Unified Ideographs Extension C" Unicode
 2450            * character block.
 2451            * @since 1.7
 2452            */
 2453           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
 2454               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
 2455                                "CJK UNIFIED IDEOGRAPHS EXTENSION C",
 2456                                "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
 2457   
 2458           /**
 2459            * Constant for the "CJK Unified Ideographs Extension D" Unicode
 2460            * character block.
 2461            * @since 1.7
 2462            */
 2463           public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
 2464               new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
 2465                                "CJK UNIFIED IDEOGRAPHS EXTENSION D",
 2466                                "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
 2467   
 2468           private static final int blockStarts[] = {
 2469               0x0000,   // 0000..007F; Basic Latin
 2470               0x0080,   // 0080..00FF; Latin-1 Supplement
 2471               0x0100,   // 0100..017F; Latin Extended-A
 2472               0x0180,   // 0180..024F; Latin Extended-B
 2473               0x0250,   // 0250..02AF; IPA Extensions
 2474               0x02B0,   // 02B0..02FF; Spacing Modifier Letters
 2475               0x0300,   // 0300..036F; Combining Diacritical Marks
 2476               0x0370,   // 0370..03FF; Greek and Coptic
 2477               0x0400,   // 0400..04FF; Cyrillic
 2478               0x0500,   // 0500..052F; Cyrillic Supplement
 2479               0x0530,   // 0530..058F; Armenian
 2480               0x0590,   // 0590..05FF; Hebrew
 2481               0x0600,   // 0600..06FF; Arabic
 2482               0x0700,   // 0700..074F; Syriac
 2483               0x0750,   // 0750..077F; Arabic Supplement
 2484               0x0780,   // 0780..07BF; Thaana
 2485               0x07C0,   // 07C0..07FF; NKo
 2486               0x0800,   // 0800..083F; Samaritan
 2487               0x0840,   // 0840..085F; Mandaic
 2488               0x0860,   //             unassigned
 2489               0x0900,   // 0900..097F; Devanagari
 2490               0x0980,   // 0980..09FF; Bengali
 2491               0x0A00,   // 0A00..0A7F; Gurmukhi
 2492               0x0A80,   // 0A80..0AFF; Gujarati
 2493               0x0B00,   // 0B00..0B7F; Oriya
 2494               0x0B80,   // 0B80..0BFF; Tamil
 2495               0x0C00,   // 0C00..0C7F; Telugu
 2496               0x0C80,   // 0C80..0CFF; Kannada
 2497               0x0D00,   // 0D00..0D7F; Malayalam
 2498               0x0D80,   // 0D80..0DFF; Sinhala
 2499               0x0E00,   // 0E00..0E7F; Thai
 2500               0x0E80,   // 0E80..0EFF; Lao
 2501               0x0F00,   // 0F00..0FFF; Tibetan
 2502               0x1000,   // 1000..109F; Myanmar
 2503               0x10A0,   // 10A0..10FF; Georgian
 2504               0x1100,   // 1100..11FF; Hangul Jamo
 2505               0x1200,   // 1200..137F; Ethiopic
 2506               0x1380,   // 1380..139F; Ethiopic Supplement
 2507               0x13A0,   // 13A0..13FF; Cherokee
 2508               0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
 2509               0x1680,   // 1680..169F; Ogham
 2510               0x16A0,   // 16A0..16FF; Runic
 2511               0x1700,   // 1700..171F; Tagalog
 2512               0x1720,   // 1720..173F; Hanunoo
 2513               0x1740,   // 1740..175F; Buhid
 2514               0x1760,   // 1760..177F; Tagbanwa
 2515               0x1780,   // 1780..17FF; Khmer
 2516               0x1800,   // 1800..18AF; Mongolian
 2517               0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
 2518               0x1900,   // 1900..194F; Limbu
 2519               0x1950,   // 1950..197F; Tai Le
 2520               0x1980,   // 1980..19DF; New Tai Lue
 2521               0x19E0,   // 19E0..19FF; Khmer Symbols
 2522               0x1A00,   // 1A00..1A1F; Buginese
 2523               0x1A20,   // 1A20..1AAF; Tai Tham
 2524               0x1AB0,   //             unassigned
 2525               0x1B00,   // 1B00..1B7F; Balinese
 2526               0x1B80,   // 1B80..1BBF; Sundanese
 2527               0x1BC0,   // 1BC0..1BFF; Batak
 2528               0x1C00,   // 1C00..1C4F; Lepcha
 2529               0x1C50,   // 1C50..1C7F; Ol Chiki
 2530               0x1C80,   //             unassigned
 2531               0x1CD0,   // 1CD0..1CFF; Vedic Extensions
 2532               0x1D00,   // 1D00..1D7F; Phonetic Extensions
 2533               0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
 2534               0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
 2535               0x1E00,   // 1E00..1EFF; Latin Extended Additional
 2536               0x1F00,   // 1F00..1FFF; Greek Extended
 2537               0x2000,   // 2000..206F; General Punctuation
 2538               0x2070,   // 2070..209F; Superscripts and Subscripts
 2539               0x20A0,   // 20A0..20CF; Currency Symbols
 2540               0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
 2541               0x2100,   // 2100..214F; Letterlike Symbols
 2542               0x2150,   // 2150..218F; Number Forms
 2543               0x2190,   // 2190..21FF; Arrows
 2544               0x2200,   // 2200..22FF; Mathematical Operators
 2545               0x2300,   // 2300..23FF; Miscellaneous Technical
 2546               0x2400,   // 2400..243F; Control Pictures
 2547               0x2440,   // 2440..245F; Optical Character Recognition
 2548               0x2460,   // 2460..24FF; Enclosed Alphanumerics
 2549               0x2500,   // 2500..257F; Box Drawing
 2550               0x2580,   // 2580..259F; Block Elements
 2551               0x25A0,   // 25A0..25FF; Geometric Shapes
 2552               0x2600,   // 2600..26FF; Miscellaneous Symbols
 2553               0x2700,   // 2700..27BF; Dingbats
 2554               0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
 2555               0x27F0,   // 27F0..27FF; Supplemental Arrows-A
 2556               0x2800,   // 2800..28FF; Braille Patterns
 2557               0x2900,   // 2900..297F; Supplemental Arrows-B
 2558               0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
 2559               0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
 2560               0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
 2561               0x2C00,   // 2C00..2C5F; Glagolitic
 2562               0x2C60,   // 2C60..2C7F; Latin Extended-C
 2563               0x2C80,   // 2C80..2CFF; Coptic
 2564               0x2D00,   // 2D00..2D2F; Georgian Supplement
 2565               0x2D30,   // 2D30..2D7F; Tifinagh
 2566               0x2D80,   // 2D80..2DDF; Ethiopic Extended
 2567               0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
 2568               0x2E00,   // 2E00..2E7F; Supplemental Punctuation
 2569               0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
 2570               0x2F00,   // 2F00..2FDF; Kangxi Radicals
 2571               0x2FE0,   //             unassigned
 2572               0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
 2573               0x3000,   // 3000..303F; CJK Symbols and Punctuation
 2574               0x3040,   // 3040..309F; Hiragana
 2575               0x30A0,   // 30A0..30FF; Katakana
 2576               0x3100,   // 3100..312F; Bopomofo
 2577               0x3130,   // 3130..318F; Hangul Compatibility Jamo
 2578               0x3190,   // 3190..319F; Kanbun
 2579               0x31A0,   // 31A0..31BF; Bopomofo Extended
 2580               0x31C0,   // 31C0..31EF; CJK Strokes
 2581               0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
 2582               0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
 2583               0x3300,   // 3300..33FF; CJK Compatibility
 2584               0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
 2585               0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
 2586               0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
 2587               0xA000,   // A000..A48F; Yi Syllables
 2588               0xA490,   // A490..A4CF; Yi Radicals
 2589               0xA4D0,   // A4D0..A4FF; Lisu
 2590               0xA500,   // A500..A63F; Vai
 2591               0xA640,   // A640..A69F; Cyrillic Extended-B
 2592               0xA6A0,   // A6A0..A6FF; Bamum
 2593               0xA700,   // A700..A71F; Modifier Tone Letters
 2594               0xA720,   // A720..A7FF; Latin Extended-D
 2595               0xA800,   // A800..A82F; Syloti Nagri
 2596               0xA830,   // A830..A83F; Common Indic Number Forms
 2597               0xA840,   // A840..A87F; Phags-pa
 2598               0xA880,   // A880..A8DF; Saurashtra
 2599               0xA8E0,   // A8E0..A8FF; Devanagari Extended
 2600               0xA900,   // A900..A92F; Kayah Li
 2601               0xA930,   // A930..A95F; Rejang
 2602               0xA960,   // A960..A97F; Hangul Jamo Extended-A
 2603               0xA980,   // A980..A9DF; Javanese
 2604               0xA9E0,   //             unassigned
 2605               0xAA00,   // AA00..AA5F; Cham
 2606               0xAA60,   // AA60..AA7F; Myanmar Extended-A
 2607               0xAA80,   // AA80..AADF; Tai Viet
 2608               0xAAE0,   //             unassigned
 2609               0xAB00,   // AB00..AB2F; Ethiopic Extended-A
 2610               0xAB30,   //             unassigned
 2611               0xABC0,   // ABC0..ABFF; Meetei Mayek
 2612               0xAC00,   // AC00..D7AF; Hangul Syllables
 2613               0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
 2614               0xD800,   // D800..DB7F; High Surrogates
 2615               0xDB80,   // DB80..DBFF; High Private Use Surrogates
 2616               0xDC00,   // DC00..DFFF; Low Surrogates
 2617               0xE000,   // E000..F8FF; Private Use Area
 2618               0xF900,   // F900..FAFF; CJK Compatibility Ideographs
 2619               0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
 2620               0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
 2621               0xFE00,   // FE00..FE0F; Variation Selectors
 2622               0xFE10,   // FE10..FE1F; Vertical Forms
 2623               0xFE20,   // FE20..FE2F; Combining Half Marks
 2624               0xFE30,   // FE30..FE4F; CJK Compatibility Forms
 2625               0xFE50,   // FE50..FE6F; Small Form Variants
 2626               0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
 2627               0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
 2628               0xFFF0,   // FFF0..FFFF; Specials
 2629               0x10000,  // 10000..1007F; Linear B Syllabary
 2630               0x10080,  // 10080..100FF; Linear B Ideograms
 2631               0x10100,  // 10100..1013F; Aegean Numbers
 2632               0x10140,  // 10140..1018F; Ancient Greek Numbers
 2633               0x10190,  // 10190..101CF; Ancient Symbols
 2634               0x101D0,  // 101D0..101FF; Phaistos Disc
 2635               0x10200,  //               unassigned
 2636               0x10280,  // 10280..1029F; Lycian
 2637               0x102A0,  // 102A0..102DF; Carian
 2638               0x102E0,  //               unassigned
 2639               0x10300,  // 10300..1032F; Old Italic
 2640               0x10330,  // 10330..1034F; Gothic
 2641               0x10350,  //               unassigned
 2642               0x10380,  // 10380..1039F; Ugaritic
 2643               0x103A0,  // 103A0..103DF; Old Persian
 2644               0x103E0,  //               unassigned
 2645               0x10400,  // 10400..1044F; Deseret
 2646               0x10450,  // 10450..1047F; Shavian
 2647               0x10480,  // 10480..104AF; Osmanya
 2648               0x104B0,  //               unassigned
 2649               0x10800,  // 10800..1083F; Cypriot Syllabary
 2650               0x10840,  // 10840..1085F; Imperial Aramaic
 2651               0x10860,  //               unassigned
 2652               0x10900,  // 10900..1091F; Phoenician
 2653               0x10920,  // 10920..1093F; Lydian
 2654               0x10940,  //               unassigned
 2655               0x10A00,  // 10A00..10A5F; Kharoshthi
 2656               0x10A60,  // 10A60..10A7F; Old South Arabian
 2657               0x10A80,  //               unassigned
 2658               0x10B00,  // 10B00..10B3F; Avestan
 2659               0x10B40,  // 10B40..10B5F; Inscriptional Parthian
 2660               0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
 2661               0x10B80,  //               unassigned
 2662               0x10C00,  // 10C00..10C4F; Old Turkic
 2663               0x10C50,  //               unassigned
 2664               0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
 2665               0x10E80,  //               unassigned
 2666               0x11000,  // 11000..1107F; Brahmi
 2667               0x11080,  // 11080..110CF; Kaithi
 2668               0x110D0,  //               unassigned
 2669               0x12000,  // 12000..123FF; Cuneiform
 2670               0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
 2671               0x12480,  //               unassigned
 2672               0x13000,  // 13000..1342F; Egyptian Hieroglyphs
 2673               0x13430,  //               unassigned
 2674               0x16800,  // 16800..16A3F; Bamum Supplement
 2675               0x16A40,  //               unassigned
 2676               0x1B000,  // 1B000..1B0FF; Kana Supplement
 2677               0x1B100,  //               unassigned
 2678               0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
 2679               0x1D100,  // 1D100..1D1FF; Musical Symbols
 2680               0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
 2681               0x1D250,  //               unassigned
 2682               0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
 2683               0x1D360,  // 1D360..1D37F; Counting Rod Numerals
 2684               0x1D380,  //               unassigned
 2685               0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
 2686               0x1D800,  //               unassigned
 2687               0x1F000,  // 1F000..1F02F; Mahjong Tiles
 2688               0x1F030,  // 1F030..1F09F; Domino Tiles
 2689               0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
 2690               0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
 2691               0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
 2692               0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
 2693               0x1F600,  // 1F600..1F64F; Emoticons
 2694               0x1F650,  //               unassigned
 2695               0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
 2696               0x1F700,  // 1F700..1F77F; Alchemical Symbols
 2697               0x1F780,  //               unassigned
 2698               0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
 2699               0x2A6E0,  //               unassigned
 2700               0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
 2701               0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
 2702               0x2B820,  //               unassigned
 2703               0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
 2704               0x2FA20,  //               unassigned
 2705               0xE0000,  // E0000..E007F; Tags
 2706               0xE0080,  //               unassigned
 2707               0xE0100,  // E0100..E01EF; Variation Selectors Supplement
 2708               0xE01F0,  //               unassigned
 2709               0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
 2710               0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
 2711           };
 2712   
 2713           private static final UnicodeBlock[] blocks = {
 2714               BASIC_LATIN,
 2715               LATIN_1_SUPPLEMENT,
 2716               LATIN_EXTENDED_A,
 2717               LATIN_EXTENDED_B,
 2718               IPA_EXTENSIONS,
 2719               SPACING_MODIFIER_LETTERS,
 2720               COMBINING_DIACRITICAL_MARKS,
 2721               GREEK,
 2722               CYRILLIC,
 2723               CYRILLIC_SUPPLEMENTARY,
 2724               ARMENIAN,
 2725               HEBREW,
 2726               ARABIC,
 2727               SYRIAC,
 2728               ARABIC_SUPPLEMENT,
 2729               THAANA,
 2730               NKO,
 2731               SAMARITAN,
 2732               MANDAIC,
 2733               null,
 2734               DEVANAGARI,
 2735               BENGALI,
 2736               GURMUKHI,
 2737               GUJARATI,
 2738               ORIYA,
 2739               TAMIL,
 2740               TELUGU,
 2741               KANNADA,
 2742               MALAYALAM,
 2743               SINHALA,
 2744               THAI,
 2745               LAO,
 2746               TIBETAN,
 2747               MYANMAR,
 2748               GEORGIAN,
 2749               HANGUL_JAMO,
 2750               ETHIOPIC,
 2751               ETHIOPIC_SUPPLEMENT,
 2752               CHEROKEE,
 2753               UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
 2754               OGHAM,
 2755               RUNIC,
 2756               TAGALOG,
 2757               HANUNOO,
 2758               BUHID,
 2759               TAGBANWA,
 2760               KHMER,
 2761               MONGOLIAN,
 2762               UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
 2763               LIMBU,
 2764               TAI_LE,
 2765               NEW_TAI_LUE,
 2766               KHMER_SYMBOLS,
 2767               BUGINESE,
 2768               TAI_THAM,
 2769               null,
 2770               BALINESE,
 2771               SUNDANESE,
 2772               BATAK,
 2773               LEPCHA,
 2774               OL_CHIKI,
 2775               null,
 2776               VEDIC_EXTENSIONS,
 2777               PHONETIC_EXTENSIONS,
 2778               PHONETIC_EXTENSIONS_SUPPLEMENT,
 2779               COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
 2780               LATIN_EXTENDED_ADDITIONAL,
 2781               GREEK_EXTENDED,
 2782               GENERAL_PUNCTUATION,
 2783               SUPERSCRIPTS_AND_SUBSCRIPTS,
 2784               CURRENCY_SYMBOLS,
 2785               COMBINING_MARKS_FOR_SYMBOLS,
 2786               LETTERLIKE_SYMBOLS,
 2787               NUMBER_FORMS,
 2788               ARROWS,
 2789               MATHEMATICAL_OPERATORS,
 2790               MISCELLANEOUS_TECHNICAL,
 2791               CONTROL_PICTURES,
 2792               OPTICAL_CHARACTER_RECOGNITION,
 2793               ENCLOSED_ALPHANUMERICS,
 2794               BOX_DRAWING,
 2795               BLOCK_ELEMENTS,
 2796               GEOMETRIC_SHAPES,
 2797               MISCELLANEOUS_SYMBOLS,
 2798               DINGBATS,
 2799               MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
 2800               SUPPLEMENTAL_ARROWS_A,
 2801               BRAILLE_PATTERNS,
 2802               SUPPLEMENTAL_ARROWS_B,
 2803               MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
 2804               SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
 2805               MISCELLANEOUS_SYMBOLS_AND_ARROWS,
 2806               GLAGOLITIC,
 2807               LATIN_EXTENDED_C,
 2808               COPTIC,
 2809               GEORGIAN_SUPPLEMENT,
 2810               TIFINAGH,
 2811               ETHIOPIC_EXTENDED,
 2812               CYRILLIC_EXTENDED_A,
 2813               SUPPLEMENTAL_PUNCTUATION,
 2814               CJK_RADICALS_SUPPLEMENT,
 2815               KANGXI_RADICALS,
 2816               null,
 2817               IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
 2818               CJK_SYMBOLS_AND_PUNCTUATION,
 2819               HIRAGANA,
 2820               KATAKANA,
 2821               BOPOMOFO,
 2822               HANGUL_COMPATIBILITY_JAMO,
 2823               KANBUN,
 2824               BOPOMOFO_EXTENDED,
 2825               CJK_STROKES,
 2826               KATAKANA_PHONETIC_EXTENSIONS,
 2827               ENCLOSED_CJK_LETTERS_AND_MONTHS,
 2828               CJK_COMPATIBILITY,
 2829               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
 2830               YIJING_HEXAGRAM_SYMBOLS,
 2831               CJK_UNIFIED_IDEOGRAPHS,
 2832               YI_SYLLABLES,
 2833               YI_RADICALS,
 2834               LISU,
 2835               VAI,
 2836               CYRILLIC_EXTENDED_B,
 2837               BAMUM,
 2838               MODIFIER_TONE_LETTERS,
 2839               LATIN_EXTENDED_D,
 2840               SYLOTI_NAGRI,
 2841               COMMON_INDIC_NUMBER_FORMS,
 2842               PHAGS_PA,
 2843               SAURASHTRA,
 2844               DEVANAGARI_EXTENDED,
 2845               KAYAH_LI,
 2846               REJANG,
 2847               HANGUL_JAMO_EXTENDED_A,
 2848               JAVANESE,
 2849               null,
 2850               CHAM,
 2851               MYANMAR_EXTENDED_A,
 2852               TAI_VIET,
 2853               null,
 2854               ETHIOPIC_EXTENDED_A,
 2855               null,
 2856               MEETEI_MAYEK,
 2857               HANGUL_SYLLABLES,
 2858               HANGUL_JAMO_EXTENDED_B,
 2859               HIGH_SURROGATES,
 2860               HIGH_PRIVATE_USE_SURROGATES,
 2861               LOW_SURROGATES,
 2862               PRIVATE_USE_AREA,
 2863               CJK_COMPATIBILITY_IDEOGRAPHS,
 2864               ALPHABETIC_PRESENTATION_FORMS,
 2865               ARABIC_PRESENTATION_FORMS_A,
 2866               VARIATION_SELECTORS,
 2867               VERTICAL_FORMS,
 2868               COMBINING_HALF_MARKS,
 2869               CJK_COMPATIBILITY_FORMS,
 2870               SMALL_FORM_VARIANTS,
 2871               ARABIC_PRESENTATION_FORMS_B,
 2872               HALFWIDTH_AND_FULLWIDTH_FORMS,
 2873               SPECIALS,
 2874               LINEAR_B_SYLLABARY,
 2875               LINEAR_B_IDEOGRAMS,
 2876               AEGEAN_NUMBERS,
 2877               ANCIENT_GREEK_NUMBERS,
 2878               ANCIENT_SYMBOLS,
 2879               PHAISTOS_DISC,
 2880               null,
 2881               LYCIAN,
 2882               CARIAN,
 2883               null,
 2884               OLD_ITALIC,
 2885               GOTHIC,
 2886               null,
 2887               UGARITIC,
 2888               OLD_PERSIAN,
 2889               null,
 2890               DESERET,
 2891               SHAVIAN,
 2892               OSMANYA,
 2893               null,
 2894               CYPRIOT_SYLLABARY,
 2895               IMPERIAL_ARAMAIC,
 2896               null,
 2897               PHOENICIAN,
 2898               LYDIAN,
 2899               null,
 2900               KHAROSHTHI,
 2901               OLD_SOUTH_ARABIAN,
 2902               null,
 2903               AVESTAN,
 2904               INSCRIPTIONAL_PARTHIAN,
 2905               INSCRIPTIONAL_PAHLAVI,
 2906               null,
 2907               OLD_TURKIC,
 2908               null,
 2909               RUMI_NUMERAL_SYMBOLS,
 2910               null,
 2911               BRAHMI,
 2912               KAITHI,
 2913               null,
 2914               CUNEIFORM,
 2915               CUNEIFORM_NUMBERS_AND_PUNCTUATION,
 2916               null,
 2917               EGYPTIAN_HIEROGLYPHS,
 2918               null,
 2919               BAMUM_SUPPLEMENT,
 2920               null,
 2921               KANA_SUPPLEMENT,
 2922               null,
 2923               BYZANTINE_MUSICAL_SYMBOLS,
 2924               MUSICAL_SYMBOLS,
 2925               ANCIENT_GREEK_MUSICAL_NOTATION,
 2926               null,
 2927               TAI_XUAN_JING_SYMBOLS,
 2928               COUNTING_ROD_NUMERALS,
 2929               null,
 2930               MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
 2931               null,
 2932               MAHJONG_TILES,
 2933               DOMINO_TILES,
 2934               PLAYING_CARDS,
 2935               ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
 2936               ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
 2937               MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
 2938               EMOTICONS,
 2939               null,
 2940               TRANSPORT_AND_MAP_SYMBOLS,
 2941               ALCHEMICAL_SYMBOLS,
 2942               null,
 2943               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
 2944               null,
 2945               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
 2946               CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
 2947               null,
 2948               CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
 2949               null,
 2950               TAGS,
 2951               null,
 2952               VARIATION_SELECTORS_SUPPLEMENT,
 2953               null,
 2954               SUPPLEMENTARY_PRIVATE_USE_AREA_A,
 2955               SUPPLEMENTARY_PRIVATE_USE_AREA_B
 2956           };
 2957   
 2958   
 2959           /**
 2960            * Returns the object representing the Unicode block containing the
 2961            * given character, or {@code null} if the character is not a
 2962            * member of a defined block.
 2963            *
 2964            * <p><b>Note:</b> This method cannot handle
 2965            * <a href="Character.html#supplementary"> supplementary
 2966            * characters</a>.  To support all Unicode characters, including
 2967            * supplementary characters, use the {@link #of(int)} method.
 2968            *
 2969            * @param   c  The character in question
 2970            * @return  The {@code UnicodeBlock} instance representing the
 2971            *          Unicode block of which this character is a member, or
 2972            *          {@code null} if the character is not a member of any
 2973            *          Unicode block
 2974            */
 2975           public static UnicodeBlock of(char c) {
 2976               return of((int)c);
 2977           }
 2978   
 2979           /**
 2980            * Returns the object representing the Unicode block
 2981            * containing the given character (Unicode code point), or
 2982            * {@code null} if the character is not a member of a
 2983            * defined block.
 2984            *
 2985            * @param   codePoint the character (Unicode code point) in question.
 2986            * @return  The {@code UnicodeBlock} instance representing the
 2987            *          Unicode block of which this character is a member, or
 2988            *          {@code null} if the character is not a member of any
 2989            *          Unicode block
 2990            * @exception IllegalArgumentException if the specified
 2991            * {@code codePoint} is an invalid Unicode code point.
 2992            * @see Character#isValidCodePoint(int)
 2993            * @since   1.5
 2994            */
 2995           public static UnicodeBlock of(int codePoint) {
 2996               if (!isValidCodePoint(codePoint)) {
 2997                   throw new IllegalArgumentException();
 2998               }
 2999   
 3000               int top, bottom, current;
 3001               bottom = 0;
 3002               top = blockStarts.length;
 3003               current = top/2;
 3004   
 3005               // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
 3006               while (top - bottom > 1) {
 3007                   if (codePoint >= blockStarts[current]) {
 3008                       bottom = current;
 3009                   } else {
 3010                       top = current;
 3011                   }
 3012                   current = (top + bottom) / 2;
 3013               }
 3014               return blocks[current];
 3015           }
 3016   
 3017           /**
 3018            * Returns the UnicodeBlock with the given name. Block
 3019            * names are determined by The Unicode Standard. The file
 3020            * Blocks-&lt;version&gt;.txt defines blocks for a particular
 3021            * version of the standard. The {@link Character} class specifies
 3022            * the version of the standard that it supports.
 3023            * <p>
 3024            * This method accepts block names in the following forms:
 3025            * <ol>
 3026            * <li> Canonical block names as defined by the Unicode Standard.
 3027            * For example, the standard defines a "Basic Latin" block. Therefore, this
 3028            * method accepts "Basic Latin" as a valid block name. The documentation of
 3029            * each UnicodeBlock provides the canonical name.
 3030            * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
 3031            * is a valid block name for the "Basic Latin" block.
 3032            * <li>The text representation of each constant UnicodeBlock identifier.
 3033            * For example, this method will return the {@link #BASIC_LATIN} block if
 3034            * provided with the "BASIC_LATIN" name. This form replaces all spaces and
 3035            * hyphens in the canonical name with underscores.
 3036            * </ol>
 3037            * Finally, character case is ignored for all of the valid block name forms.
 3038            * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
 3039            * The en_US locale's case mapping rules are used to provide case-insensitive
 3040            * string comparisons for block name validation.
 3041            * <p>
 3042            * If the Unicode Standard changes block names, both the previous and
 3043            * current names will be accepted.
 3044            *
 3045            * @param blockName A {@code UnicodeBlock} name.
 3046            * @return The {@code UnicodeBlock} instance identified
 3047            *         by {@code blockName}
 3048            * @throws IllegalArgumentException if {@code blockName} is an
 3049            *         invalid name
 3050            * @throws NullPointerException if {@code blockName} is null
 3051            * @since 1.5
 3052            */
 3053           public static final UnicodeBlock forName(String blockName) {
 3054               UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
 3055               if (block == null) {
 3056                   throw new IllegalArgumentException();
 3057               }
 3058               return block;
 3059           }
 3060       }
 3061   
 3062   
 3063       /**
 3064        * A family of character subsets representing the character scripts
 3065        * defined in the <a href="http://www.unicode.org/reports/tr24/">
 3066        * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
 3067        * character is assigned to a single Unicode script, either a specific
 3068        * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
 3069        * one of the following three special values,
 3070        * {@link Character.UnicodeScript#INHERITED Inherited},
 3071        * {@link Character.UnicodeScript#COMMON Common} or
 3072        * {@link Character.UnicodeScript#UNKNOWN Unknown}.
 3073        *
 3074        * @since 1.7
 3075        */
 3076       public static enum UnicodeScript {
 3077           /**
 3078            * Unicode script "Common".
 3079            */
 3080           COMMON,
 3081   
 3082           /**
 3083            * Unicode script "Latin".
 3084            */
 3085           LATIN,
 3086   
 3087           /**
 3088            * Unicode script "Greek".
 3089            */
 3090           GREEK,
 3091   
 3092           /**
 3093            * Unicode script "Cyrillic".
 3094            */
 3095           CYRILLIC,
 3096   
 3097           /**
 3098            * Unicode script "Armenian".
 3099            */
 3100           ARMENIAN,
 3101   
 3102           /**
 3103            * Unicode script "Hebrew".
 3104            */
 3105           HEBREW,
 3106   
 3107           /**
 3108            * Unicode script "Arabic".
 3109            */
 3110           ARABIC,
 3111   
 3112           /**
 3113            * Unicode script "Syriac".
 3114            */
 3115           SYRIAC,
 3116   
 3117           /**
 3118            * Unicode script "Thaana".
 3119            */
 3120           THAANA,
 3121   
 3122           /**
 3123            * Unicode script "Devanagari".
 3124            */
 3125           DEVANAGARI,
 3126   
 3127           /**
 3128            * Unicode script "Bengali".
 3129            */
 3130           BENGALI,
 3131   
 3132           /**
 3133            * Unicode script "Gurmukhi".
 3134            */
 3135           GURMUKHI,
 3136   
 3137           /**
 3138            * Unicode script "Gujarati".
 3139            */
 3140           GUJARATI,
 3141   
 3142           /**
 3143            * Unicode script "Oriya".
 3144            */
 3145           ORIYA,
 3146   
 3147           /**
 3148            * Unicode script "Tamil".
 3149            */
 3150           TAMIL,
 3151   
 3152           /**
 3153            * Unicode script "Telugu".
 3154            */
 3155           TELUGU,
 3156   
 3157           /**
 3158            * Unicode script "Kannada".
 3159            */
 3160           KANNADA,
 3161   
 3162           /**
 3163            * Unicode script "Malayalam".
 3164            */
 3165           MALAYALAM,
 3166   
 3167           /**
 3168            * Unicode script "Sinhala".
 3169            */
 3170           SINHALA,
 3171   
 3172           /**
 3173            * Unicode script "Thai".
 3174            */
 3175           THAI,
 3176   
 3177           /**
 3178            * Unicode script "Lao".
 3179            */
 3180           LAO,
 3181   
 3182           /**
 3183            * Unicode script "Tibetan".
 3184            */
 3185           TIBETAN,
 3186   
 3187           /**
 3188            * Unicode script "Myanmar".
 3189            */
 3190           MYANMAR,
 3191   
 3192           /**
 3193            * Unicode script "Georgian".
 3194            */
 3195           GEORGIAN,
 3196   
 3197           /**
 3198            * Unicode script "Hangul".
 3199            */
 3200           HANGUL,
 3201   
 3202           /**
 3203            * Unicode script "Ethiopic".
 3204            */
 3205           ETHIOPIC,
 3206   
 3207           /**
 3208            * Unicode script "Cherokee".
 3209            */
 3210           CHEROKEE,
 3211   
 3212           /**
 3213            * Unicode script "Canadian_Aboriginal".
 3214            */
 3215           CANADIAN_ABORIGINAL,
 3216   
 3217           /**
 3218            * Unicode script "Ogham".
 3219            */
 3220           OGHAM,
 3221   
 3222           /**
 3223            * Unicode script "Runic".
 3224            */
 3225           RUNIC,
 3226   
 3227           /**
 3228            * Unicode script "Khmer".
 3229            */
 3230           KHMER,
 3231   
 3232           /**
 3233            * Unicode script "Mongolian".
 3234            */
 3235           MONGOLIAN,
 3236   
 3237           /**
 3238            * Unicode script "Hiragana".
 3239            */
 3240           HIRAGANA,
 3241   
 3242           /**
 3243            * Unicode script "Katakana".
 3244            */
 3245           KATAKANA,
 3246   
 3247           /**
 3248            * Unicode script "Bopomofo".
 3249            */
 3250           BOPOMOFO,
 3251   
 3252           /**
 3253            * Unicode script "Han".
 3254            */
 3255           HAN,
 3256   
 3257           /**
 3258            * Unicode script "Yi".
 3259            */
 3260           YI,
 3261   
 3262           /**
 3263            * Unicode script "Old_Italic".
 3264            */
 3265           OLD_ITALIC,
 3266   
 3267           /**
 3268            * Unicode script "Gothic".
 3269            */
 3270           GOTHIC,
 3271   
 3272           /**
 3273            * Unicode script "Deseret".
 3274            */
 3275           DESERET,
 3276   
 3277           /**
 3278            * Unicode script "Inherited".
 3279            */
 3280           INHERITED,
 3281   
 3282           /**
 3283            * Unicode script "Tagalog".
 3284            */
 3285           TAGALOG,
 3286   
 3287           /**
 3288            * Unicode script "Hanunoo".
 3289            */
 3290           HANUNOO,
 3291   
 3292           /**
 3293            * Unicode script "Buhid".
 3294            */
 3295           BUHID,
 3296   
 3297           /**
 3298            * Unicode script "Tagbanwa".
 3299            */
 3300           TAGBANWA,
 3301   
 3302           /**
 3303            * Unicode script "Limbu".
 3304            */
 3305           LIMBU,
 3306   
 3307           /**
 3308            * Unicode script "Tai_Le".
 3309            */
 3310           TAI_LE,
 3311   
 3312           /**
 3313            * Unicode script "Linear_B".
 3314            */
 3315           LINEAR_B,
 3316   
 3317           /**
 3318            * Unicode script "Ugaritic".
 3319            */
 3320           UGARITIC,
 3321   
 3322           /**
 3323            * Unicode script "Shavian".
 3324            */
 3325           SHAVIAN,
 3326   
 3327           /**
 3328            * Unicode script "Osmanya".
 3329            */
 3330           OSMANYA,
 3331   
 3332           /**
 3333            * Unicode script "Cypriot".
 3334            */
 3335           CYPRIOT,
 3336   
 3337           /**
 3338            * Unicode script "Braille".
 3339            */
 3340           BRAILLE,
 3341   
 3342           /**
 3343            * Unicode script "Buginese".
 3344            */
 3345           BUGINESE,
 3346   
 3347           /**
 3348            * Unicode script "Coptic".
 3349            */
 3350           COPTIC,
 3351   
 3352           /**
 3353            * Unicode script "New_Tai_Lue".
 3354            */
 3355           NEW_TAI_LUE,
 3356   
 3357           /**
 3358            * Unicode script "Glagolitic".
 3359            */
 3360           GLAGOLITIC,
 3361   
 3362           /**
 3363            * Unicode script "Tifinagh".
 3364            */
 3365           TIFINAGH,
 3366   
 3367           /**
 3368            * Unicode script "Syloti_Nagri".
 3369            */
 3370           SYLOTI_NAGRI,
 3371   
 3372           /**
 3373            * Unicode script "Old_Persian".
 3374            */
 3375           OLD_PERSIAN,
 3376   
 3377           /**
 3378            * Unicode script "Kharoshthi".
 3379            */
 3380           KHAROSHTHI,
 3381   
 3382           /**
 3383            * Unicode script "Balinese".
 3384            */
 3385           BALINESE,
 3386   
 3387           /**
 3388            * Unicode script "Cuneiform".
 3389            */
 3390           CUNEIFORM,
 3391   
 3392           /**
 3393            * Unicode script "Phoenician".
 3394            */
 3395           PHOENICIAN,
 3396   
 3397           /**
 3398            * Unicode script "Phags_Pa".
 3399            */
 3400           PHAGS_PA,
 3401   
 3402           /**
 3403            * Unicode script "Nko".
 3404            */
 3405           NKO,
 3406   
 3407           /**
 3408            * Unicode script "Sundanese".
 3409            */
 3410           SUNDANESE,
 3411   
 3412           /**
 3413            * Unicode script "Batak".
 3414            */
 3415           BATAK,
 3416   
 3417           /**
 3418            * Unicode script "Lepcha".
 3419            */
 3420           LEPCHA,
 3421   
 3422           /**
 3423            * Unicode script "Ol_Chiki".
 3424            */
 3425           OL_CHIKI,
 3426   
 3427           /**
 3428            * Unicode script "Vai".
 3429            */
 3430           VAI,
 3431   
 3432           /**
 3433            * Unicode script "Saurashtra".
 3434            */
 3435           SAURASHTRA,
 3436   
 3437           /**
 3438            * Unicode script "Kayah_Li".
 3439            */
 3440           KAYAH_LI,
 3441   
 3442           /**
 3443            * Unicode script "Rejang".
 3444            */
 3445           REJANG,
 3446   
 3447           /**
 3448            * Unicode script "Lycian".
 3449            */
 3450           LYCIAN,
 3451   
 3452           /**
 3453            * Unicode script "Carian".
 3454            */
 3455           CARIAN,
 3456   
 3457           /**
 3458            * Unicode script "Lydian".
 3459            */
 3460           LYDIAN,
 3461   
 3462           /**
 3463            * Unicode script "Cham".
 3464            */
 3465           CHAM,
 3466   
 3467           /**
 3468            * Unicode script "Tai_Tham".
 3469            */
 3470           TAI_THAM,
 3471   
 3472           /**
 3473            * Unicode script "Tai_Viet".
 3474            */
 3475           TAI_VIET,
 3476   
 3477           /**
 3478            * Unicode script "Avestan".
 3479            */
 3480           AVESTAN,
 3481   
 3482           /**
 3483            * Unicode script "Egyptian_Hieroglyphs".
 3484            */
 3485           EGYPTIAN_HIEROGLYPHS,
 3486   
 3487           /**
 3488            * Unicode script "Samaritan".
 3489            */
 3490           SAMARITAN,
 3491   
 3492           /**
 3493            * Unicode script "Mandaic".
 3494            */
 3495           MANDAIC,
 3496   
 3497           /**
 3498            * Unicode script "Lisu".
 3499            */
 3500           LISU,
 3501   
 3502           /**
 3503            * Unicode script "Bamum".
 3504            */
 3505           BAMUM,
 3506   
 3507           /**
 3508            * Unicode script "Javanese".
 3509            */
 3510           JAVANESE,
 3511   
 3512           /**
 3513            * Unicode script "Meetei_Mayek".
 3514            */
 3515           MEETEI_MAYEK,
 3516   
 3517           /**
 3518            * Unicode script "Imperial_Aramaic".
 3519            */
 3520           IMPERIAL_ARAMAIC,
 3521   
 3522           /**
 3523            * Unicode script "Old_South_Arabian".
 3524            */
 3525           OLD_SOUTH_ARABIAN,
 3526   
 3527           /**
 3528            * Unicode script "Inscriptional_Parthian".
 3529            */
 3530           INSCRIPTIONAL_PARTHIAN,
 3531   
 3532           /**
 3533            * Unicode script "Inscriptional_Pahlavi".
 3534            */
 3535           INSCRIPTIONAL_PAHLAVI,
 3536   
 3537           /**
 3538            * Unicode script "Old_Turkic".
 3539            */
 3540           OLD_TURKIC,
 3541   
 3542           /**
 3543            * Unicode script "Brahmi".
 3544            */
 3545           BRAHMI,
 3546   
 3547           /**
 3548            * Unicode script "Kaithi".
 3549            */
 3550           KAITHI,
 3551   
 3552           /**
 3553            * Unicode script "Unknown".
 3554            */
 3555           UNKNOWN;
 3556   
 3557           private static final int[] scriptStarts = {
 3558               0x0000,   // 0000..0040; COMMON
 3559               0x0041,   // 0041..005A; LATIN
 3560               0x005B,   // 005B..0060; COMMON
 3561               0x0061,   // 0061..007A; LATIN
 3562               0x007B,   // 007B..00A9; COMMON
 3563               0x00AA,   // 00AA..00AA; LATIN
 3564               0x00AB,   // 00AB..00B9; COMMON
 3565               0x00BA,   // 00BA..00BA; LATIN
 3566               0x00BB,   // 00BB..00BF; COMMON
 3567               0x00C0,   // 00C0..00D6; LATIN
 3568               0x00D7,   // 00D7..00D7; COMMON
 3569               0x00D8,   // 00D8..00F6; LATIN
 3570               0x00F7,   // 00F7..00F7; COMMON
 3571               0x00F8,   // 00F8..02B8; LATIN
 3572               0x02B9,   // 02B9..02DF; COMMON
 3573               0x02E0,   // 02E0..02E4; LATIN
 3574               0x02E5,   // 02E5..02E9; COMMON
 3575               0x02EA,   // 02EA..02EB; BOPOMOFO
 3576               0x02EC,   // 02EC..02FF; COMMON
 3577               0x0300,   // 0300..036F; INHERITED
 3578               0x0370,   // 0370..0373; GREEK
 3579               0x0374,   // 0374..0374; COMMON
 3580               0x0375,   // 0375..037D; GREEK
 3581               0x037E,   // 037E..0383; COMMON
 3582               0x0384,   // 0384..0384; GREEK
 3583               0x0385,   // 0385..0385; COMMON
 3584               0x0386,   // 0386..0386; GREEK
 3585               0x0387,   // 0387..0387; COMMON
 3586               0x0388,   // 0388..03E1; GREEK
 3587               0x03E2,   // 03E2..03EF; COPTIC
 3588               0x03F0,   // 03F0..03FF; GREEK
 3589               0x0400,   // 0400..0484; CYRILLIC
 3590               0x0485,   // 0485..0486; INHERITED
 3591               0x0487,   // 0487..0530; CYRILLIC
 3592               0x0531,   // 0531..0588; ARMENIAN
 3593               0x0589,   // 0589..0589; COMMON
 3594               0x058A,   // 058A..0590; ARMENIAN
 3595               0x0591,   // 0591..05FF; HEBREW
 3596               0x0600,   // 0600..060B; ARABIC
 3597               0x060C,   // 060C..060C; COMMON
 3598               0x060D,   // 060D..061A; ARABIC
 3599               0x061B,   // 061B..061D; COMMON
 3600               0x061E,   // 061E..061E; ARABIC
 3601               0x061F,   // 061F..061F; COMMON
 3602               0x0620,   // 0620..063F; ARABIC
 3603               0x0640,   // 0640..0640; COMMON
 3604               0x0641,   // 0641..064A; ARABIC
 3605               0x064B,   // 064B..0655; INHERITED
 3606               0x0656,   // 0656..065E; ARABIC
 3607               0x065F,   // 065F..065F; INHERITED
 3608               0x0660,   // 0660..0669; COMMON
 3609               0x066A,   // 066A..066F; ARABIC
 3610               0x0670,   // 0670..0670; INHERITED
 3611               0x0671,   // 0671..06DC; ARABIC
 3612               0x06DD,   // 06DD..06DD; COMMON
 3613               0x06DE,   // 06DE..06FF; ARABIC
 3614               0x0700,   // 0700..074F; SYRIAC
 3615               0x0750,   // 0750..077F; ARABIC
 3616               0x0780,   // 0780..07BF; THAANA
 3617               0x07C0,   // 07C0..07FF; NKO
 3618               0x0800,   // 0800..083F; SAMARITAN
 3619               0x0840,   // 0840..08FF; MANDAIC
 3620               0x0900,   // 0900..0950; DEVANAGARI
 3621               0x0951,   // 0951..0952; INHERITED
 3622               0x0953,   // 0953..0963; DEVANAGARI
 3623               0x0964,   // 0964..0965; COMMON
 3624               0x0966,   // 0966..096F; DEVANAGARI
 3625               0x0970,   // 0970..0970; COMMON
 3626               0x0971,   // 0971..0980; DEVANAGARI
 3627               0x0981,   // 0981..0A00; BENGALI
 3628               0x0A01,   // 0A01..0A80; GURMUKHI
 3629               0x0A81,   // 0A81..0B00; GUJARATI
 3630               0x0B01,   // 0B01..0B81; ORIYA
 3631               0x0B82,   // 0B82..0C00; TAMIL
 3632               0x0C01,   // 0C01..0C81; TELUGU
 3633               0x0C82,   // 0C82..0CF0; KANNADA
 3634               0x0D02,   // 0D02..0D81; MALAYALAM
 3635               0x0D82,   // 0D82..0E00; SINHALA
 3636               0x0E01,   // 0E01..0E3E; THAI
 3637               0x0E3F,   // 0E3F..0E3F; COMMON
 3638               0x0E40,   // 0E40..0E80; THAI
 3639               0x0E81,   // 0E81..0EFF; LAO
 3640               0x0F00,   // 0F00..0FD4; TIBETAN
 3641               0x0FD5,   // 0FD5..0FD8; COMMON
 3642               0x0FD9,   // 0FD9..0FFF; TIBETAN
 3643               0x1000,   // 1000..109F; MYANMAR
 3644               0x10A0,   // 10A0..10FA; GEORGIAN
 3645               0x10FB,   // 10FB..10FB; COMMON
 3646               0x10FC,   // 10FC..10FF; GEORGIAN
 3647               0x1100,   // 1100..11FF; HANGUL
 3648               0x1200,   // 1200..139F; ETHIOPIC
 3649               0x13A0,   // 13A0..13FF; CHEROKEE
 3650               0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
 3651               0x1680,   // 1680..169F; OGHAM
 3652               0x16A0,   // 16A0..16EA; RUNIC
 3653               0x16EB,   // 16EB..16ED; COMMON
 3654               0x16EE,   // 16EE..16FF; RUNIC
 3655               0x1700,   // 1700..171F; TAGALOG
 3656               0x1720,   // 1720..1734; HANUNOO
 3657               0x1735,   // 1735..173F; COMMON
 3658               0x1740,   // 1740..175F; BUHID
 3659               0x1760,   // 1760..177F; TAGBANWA
 3660               0x1780,   // 1780..17FF; KHMER
 3661               0x1800,   // 1800..1801; MONGOLIAN
 3662               0x1802,   // 1802..1803; COMMON
 3663               0x1804,   // 1804..1804; MONGOLIAN
 3664               0x1805,   // 1805..1805; COMMON
 3665               0x1806,   // 1806..18AF; MONGOLIAN
 3666               0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
 3667               0x1900,   // 1900..194F; LIMBU
 3668               0x1950,   // 1950..197F; TAI_LE
 3669               0x1980,   // 1980..19DF; NEW_TAI_LUE
 3670               0x19E0,   // 19E0..19FF; KHMER
 3671               0x1A00,   // 1A00..1A1F; BUGINESE
 3672               0x1A20,   // 1A20..1AFF; TAI_THAM
 3673               0x1B00,   // 1B00..1B7F; BALINESE
 3674               0x1B80,   // 1B80..1BBF; SUNDANESE
 3675               0x1BC0,   // 1BC0..1BFF; BATAK
 3676               0x1C00,   // 1C00..1C4F; LEPCHA
 3677               0x1C50,   // 1C50..1CCF; OL_CHIKI
 3678               0x1CD0,   // 1CD0..1CD2; INHERITED
 3679               0x1CD3,   // 1CD3..1CD3; COMMON
 3680               0x1CD4,   // 1CD4..1CE0; INHERITED
 3681               0x1CE1,   // 1CE1..1CE1; COMMON
 3682               0x1CE2,   // 1CE2..1CE8; INHERITED
 3683               0x1CE9,   // 1CE9..1CEC; COMMON
 3684               0x1CED,   // 1CED..1CED; INHERITED
 3685               0x1CEE,   // 1CEE..1CFF; COMMON
 3686               0x1D00,   // 1D00..1D25; LATIN
 3687               0x1D26,   // 1D26..1D2A; GREEK
 3688               0x1D2B,   // 1D2B..1D2B; CYRILLIC
 3689               0x1D2C,   // 1D2C..1D5C; LATIN
 3690               0x1D5D,   // 1D5D..1D61; GREEK
 3691               0x1D62,   // 1D62..1D65; LATIN
 3692               0x1D66,   // 1D66..1D6A; GREEK
 3693               0x1D6B,   // 1D6B..1D77; LATIN
 3694               0x1D78,   // 1D78..1D78; CYRILLIC
 3695               0x1D79,   // 1D79..1DBE; LATIN
 3696               0x1DBF,   // 1DBF..1DBF; GREEK
 3697               0x1DC0,   // 1DC0..1DFF; INHERITED
 3698               0x1E00,   // 1E00..1EFF; LATIN
 3699               0x1F00,   // 1F00..1FFF; GREEK
 3700               0x2000,   // 2000..200B; COMMON
 3701               0x200C,   // 200C..200D; INHERITED
 3702               0x200E,   // 200E..2070; COMMON
 3703               0x2071,   // 2071..2073; LATIN
 3704               0x2074,   // 2074..207E; COMMON
 3705               0x207F,   // 207F..207F; LATIN
 3706               0x2080,   // 2080..208F; COMMON
 3707               0x2090,   // 2090..209F; LATIN
 3708               0x20A0,   // 20A0..20CF; COMMON
 3709               0x20D0,   // 20D0..20FF; INHERITED
 3710               0x2100,   // 2100..2125; COMMON
 3711               0x2126,   // 2126..2126; GREEK
 3712               0x2127,   // 2127..2129; COMMON
 3713               0x212A,   // 212A..212B; LATIN
 3714               0x212C,   // 212C..2131; COMMON
 3715               0x2132,   // 2132..2132; LATIN
 3716               0x2133,   // 2133..214D; COMMON
 3717               0x214E,   // 214E..214E; LATIN
 3718               0x214F,   // 214F..215F; COMMON
 3719               0x2160,   // 2160..2188; LATIN
 3720               0x2189,   // 2189..27FF; COMMON
 3721               0x2800,   // 2800..28FF; BRAILLE
 3722               0x2900,   // 2900..2BFF; COMMON
 3723               0x2C00,   // 2C00..2C5F; GLAGOLITIC
 3724               0x2C60,   // 2C60..2C7F; LATIN
 3725               0x2C80,   // 2C80..2CFF; COPTIC
 3726               0x2D00,   // 2D00..2D2F; GEORGIAN
 3727               0x2D30,   // 2D30..2D7F; TIFINAGH
 3728               0x2D80,   // 2D80..2DDF; ETHIOPIC
 3729               0x2DE0,   // 2DE0..2DFF; CYRILLIC
 3730               0x2E00,   // 2E00..2E7F; COMMON
 3731               0x2E80,   // 2E80..2FEF; HAN
 3732               0x2FF0,   // 2FF0..3004; COMMON
 3733               0x3005,   // 3005..3005; HAN
 3734               0x3006,   // 3006..3006; COMMON
 3735               0x3007,   // 3007..3007; HAN
 3736               0x3008,   // 3008..3020; COMMON
 3737               0x3021,   // 3021..3029; HAN
 3738               0x302A,   // 302A..302D; INHERITED
 3739               0x302E,   // 302E..302F; HANGUL
 3740               0x3030,   // 3030..3037; COMMON
 3741               0x3038,   // 3038..303B; HAN
 3742               0x303C,   // 303C..3040; COMMON
 3743               0x3041,   // 3041..3098; HIRAGANA
 3744               0x3099,   // 3099..309A; INHERITED
 3745               0x309B,   // 309B..309C; COMMON
 3746               0x309D,   // 309D..309F; HIRAGANA
 3747               0x30A0,   // 30A0..30A0; COMMON
 3748               0x30A1,   // 30A1..30FA; KATAKANA
 3749               0x30FB,   // 30FB..30FC; COMMON
 3750               0x30FD,   // 30FD..3104; KATAKANA
 3751               0x3105,   // 3105..3130; BOPOMOFO
 3752               0x3131,   // 3131..318F; HANGUL
 3753               0x3190,   // 3190..319F; COMMON
 3754               0x31A0,   // 31A0..31BF; BOPOMOFO
 3755               0x31C0,   // 31C0..31EF; COMMON
 3756               0x31F0,   // 31F0..31FF; KATAKANA
 3757               0x3200,   // 3200..321F; HANGUL
 3758               0x3220,   // 3220..325F; COMMON
 3759               0x3260,   // 3260..327E; HANGUL
 3760               0x327F,   // 327F..32CF; COMMON
 3761               0x32D0,   // 32D0..3357; KATAKANA
 3762               0x3358,   // 3358..33FF; COMMON
 3763               0x3400,   // 3400..4DBF; HAN
 3764               0x4DC0,   // 4DC0..4DFF; COMMON
 3765               0x4E00,   // 4E00..9FFF; HAN
 3766               0xA000,   // A000..A4CF; YI
 3767               0xA4D0,   // A4D0..A4FF; LISU
 3768               0xA500,   // A500..A63F; VAI
 3769               0xA640,   // A640..A69F; CYRILLIC
 3770               0xA6A0,   // A6A0..A6FF; BAMUM
 3771               0xA700,   // A700..A721; COMMON
 3772               0xA722,   // A722..A787; LATIN
 3773               0xA788,   // A788..A78A; COMMON
 3774               0xA78B,   // A78B..A7FF; LATIN
 3775               0xA800,   // A800..A82F; SYLOTI_NAGRI
 3776               0xA830,   // A830..A83F; COMMON
 3777               0xA840,   // A840..A87F; PHAGS_PA
 3778               0xA880,   // A880..A8DF; SAURASHTRA
 3779               0xA8E0,   // A8E0..A8FF; DEVANAGARI
 3780               0xA900,   // A900..A92F; KAYAH_LI
 3781               0xA930,   // A930..A95F; REJANG
 3782               0xA960,   // A960..A97F; HANGUL
 3783               0xA980,   // A980..A9FF; JAVANESE
 3784               0xAA00,   // AA00..AA5F; CHAM
 3785               0xAA60,   // AA60..AA7F; MYANMAR
 3786               0xAA80,   // AA80..AB00; TAI_VIET
 3787               0xAB01,   // AB01..ABBF; ETHIOPIC
 3788               0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
 3789               0xAC00,   // AC00..D7FB; HANGUL
 3790               0xD7FC,   // D7FC..F8FF; UNKNOWN
 3791               0xF900,   // F900..FAFF; HAN
 3792               0xFB00,   // FB00..FB12; LATIN
 3793               0xFB13,   // FB13..FB1C; ARMENIAN
 3794               0xFB1D,   // FB1D..FB4F; HEBREW
 3795               0xFB50,   // FB50..FD3D; ARABIC
 3796               0xFD3E,   // FD3E..FD4F; COMMON
 3797               0xFD50,   // FD50..FDFC; ARABIC
 3798               0xFDFD,   // FDFD..FDFF; COMMON
 3799               0xFE00,   // FE00..FE0F; INHERITED
 3800               0xFE10,   // FE10..FE1F; COMMON
 3801               0xFE20,   // FE20..FE2F; INHERITED
 3802               0xFE30,   // FE30..FE6F; COMMON
 3803               0xFE70,   // FE70..FEFE; ARABIC
 3804               0xFEFF,   // FEFF..FF20; COMMON
 3805               0xFF21,   // FF21..FF3A; LATIN
 3806               0xFF3B,   // FF3B..FF40; COMMON
 3807               0xFF41,   // FF41..FF5A; LATIN
 3808               0xFF5B,   // FF5B..FF65; COMMON
 3809               0xFF66,   // FF66..FF6F; KATAKANA
 3810               0xFF70,   // FF70..FF70; COMMON
 3811               0xFF71,   // FF71..FF9D; KATAKANA
 3812               0xFF9E,   // FF9E..FF9F; COMMON
 3813               0xFFA0,   // FFA0..FFDF; HANGUL
 3814               0xFFE0,   // FFE0..FFFF; COMMON
 3815               0x10000,  // 10000..100FF; LINEAR_B
 3816               0x10100,  // 10100..1013F; COMMON
 3817               0x10140,  // 10140..1018F; GREEK
 3818               0x10190,  // 10190..101FC; COMMON
 3819               0x101FD,  // 101FD..1027F; INHERITED
 3820               0x10280,  // 10280..1029F; LYCIAN
 3821               0x102A0,  // 102A0..102FF; CARIAN
 3822               0x10300,  // 10300..1032F; OLD_ITALIC
 3823               0x10330,  // 10330..1037F; GOTHIC
 3824               0x10380,  // 10380..1039F; UGARITIC
 3825               0x103A0,  // 103A0..103FF; OLD_PERSIAN
 3826               0x10400,  // 10400..1044F; DESERET
 3827               0x10450,  // 10450..1047F; SHAVIAN
 3828               0x10480,  // 10480..107FF; OSMANYA
 3829               0x10800,  // 10800..1083F; CYPRIOT
 3830               0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
 3831               0x10900,  // 10900..1091F; PHOENICIAN
 3832               0x10920,  // 10920..109FF; LYDIAN
 3833               0x10A00,  // 10A00..10A5F; KHAROSHTHI
 3834               0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
 3835               0x10B00,  // 10B00..10B3F; AVESTAN
 3836               0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
 3837               0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
 3838               0x10C00,  // 10C00..10E5F; OLD_TURKIC
 3839               0x10E60,  // 10E60..10FFF; ARABIC
 3840               0x11000,  // 11000..1107F; BRAHMI
 3841               0x11080,  // 11080..11FFF; KAITHI
 3842               0x12000,  // 12000..12FFF; CUNEIFORM
 3843               0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
 3844               0x16800,  // 16800..16A38; BAMUM
 3845               0x1B000,  // 1B000..1B000; KATAKANA
 3846               0x1B001,  // 1B001..1CFFF; HIRAGANA
 3847               0x1D000,  // 1D000..1D166; COMMON
 3848               0x1D167,  // 1D167..1D169; INHERITED
 3849               0x1D16A,  // 1D16A..1D17A; COMMON
 3850               0x1D17B,  // 1D17B..1D182; INHERITED
 3851               0x1D183,  // 1D183..1D184; COMMON
 3852               0x1D185,  // 1D185..1D18B; INHERITED
 3853               0x1D18C,  // 1D18C..1D1A9; COMMON
 3854               0x1D1AA,  // 1D1AA..1D1AD; INHERITED
 3855               0x1D1AE,  // 1D1AE..1D1FF; COMMON
 3856               0x1D200,  // 1D200..1D2FF; GREEK
 3857               0x1D300,  // 1D300..1F1FF; COMMON
 3858               0x1F200,  // 1F200..1F200; HIRAGANA
 3859               0x1F201,  // 1F210..1FFFF; COMMON
 3860               0x20000,  // 20000..E0000; HAN
 3861               0xE0001,  // E0001..E00FF; COMMON
 3862               0xE0100,  // E0100..E01EF; INHERITED
 3863               0xE01F0   // E01F0..10FFFF; UNKNOWN
 3864   
 3865           };
 3866   
 3867           private static final UnicodeScript[] scripts = {
 3868               COMMON,
 3869               LATIN,
 3870               COMMON,
 3871               LATIN,
 3872               COMMON,
 3873               LATIN,
 3874               COMMON,
 3875               LATIN,
 3876               COMMON,
 3877               LATIN,
 3878               COMMON,
 3879               LATIN,
 3880               COMMON,
 3881               LATIN,
 3882               COMMON,
 3883               LATIN,
 3884               COMMON,
 3885               BOPOMOFO,
 3886               COMMON,
 3887               INHERITED,
 3888               GREEK,
 3889               COMMON,
 3890               GREEK,
 3891               COMMON,
 3892               GREEK,
 3893               COMMON,
 3894               GREEK,
 3895               COMMON,
 3896               GREEK,
 3897               COPTIC,
 3898               GREEK,
 3899               CYRILLIC,
 3900               INHERITED,
 3901               CYRILLIC,
 3902               ARMENIAN,
 3903               COMMON,
 3904               ARMENIAN,
 3905               HEBREW,
 3906               ARABIC,
 3907               COMMON,
 3908               ARABIC,
 3909               COMMON,
 3910               ARABIC,
 3911               COMMON,
 3912               ARABIC,
 3913               COMMON,
 3914               ARABIC,
 3915               INHERITED,
 3916               ARABIC,
 3917               INHERITED,
 3918               COMMON,
 3919               ARABIC,
 3920               INHERITED,
 3921               ARABIC,
 3922               COMMON,
 3923               ARABIC,
 3924               SYRIAC,
 3925               ARABIC,
 3926               THAANA,
 3927               NKO,
 3928               SAMARITAN,
 3929               MANDAIC,
 3930               DEVANAGARI,
 3931               INHERITED,
 3932               DEVANAGARI,
 3933               COMMON,
 3934               DEVANAGARI,
 3935               COMMON,
 3936               DEVANAGARI,
 3937               BENGALI,
 3938               GURMUKHI,
 3939               GUJARATI,
 3940               ORIYA,
 3941               TAMIL,
 3942               TELUGU,
 3943               KANNADA,
 3944               MALAYALAM,
 3945               SINHALA,
 3946               THAI,
 3947               COMMON,
 3948               THAI,
 3949               LAO,
 3950               TIBETAN,
 3951               COMMON,
 3952               TIBETAN,
 3953               MYANMAR,
 3954               GEORGIAN,
 3955               COMMON,
 3956               GEORGIAN,
 3957               HANGUL,
 3958               ETHIOPIC,
 3959               CHEROKEE,
 3960               CANADIAN_ABORIGINAL,
 3961               OGHAM,
 3962               RUNIC,
 3963               COMMON,
 3964               RUNIC,
 3965               TAGALOG,
 3966               HANUNOO,
 3967               COMMON,
 3968               BUHID,
 3969               TAGBANWA,
 3970               KHMER,
 3971               MONGOLIAN,
 3972               COMMON,
 3973               MONGOLIAN,
 3974               COMMON,
 3975               MONGOLIAN,
 3976               CANADIAN_ABORIGINAL,
 3977               LIMBU,
 3978               TAI_LE,
 3979               NEW_TAI_LUE,
 3980               KHMER,
 3981               BUGINESE,
 3982               TAI_THAM,
 3983               BALINESE,
 3984               SUNDANESE,
 3985               BATAK,
 3986               LEPCHA,
 3987               OL_CHIKI,
 3988               INHERITED,
 3989               COMMON,
 3990               INHERITED,
 3991               COMMON,
 3992               INHERITED,
 3993               COMMON,
 3994               INHERITED,
 3995               COMMON,
 3996               LATIN,
 3997               GREEK,
 3998               CYRILLIC,
 3999               LATIN,
 4000               GREEK,
 4001               LATIN,
 4002               GREEK,
 4003               LATIN,
 4004               CYRILLIC,
 4005               LATIN,
 4006               GREEK,
 4007               INHERITED,
 4008               LATIN,
 4009               GREEK,
 4010               COMMON,
 4011               INHERITED,
 4012               COMMON,
 4013               LATIN,
 4014               COMMON,
 4015               LATIN,
 4016               COMMON,
 4017               LATIN,
 4018               COMMON,
 4019               INHERITED,
 4020               COMMON,
 4021               GREEK,
 4022               COMMON,
 4023               LATIN,
 4024               COMMON,
 4025               LATIN,
 4026               COMMON,
 4027               LATIN,
 4028               COMMON,
 4029               LATIN,
 4030               COMMON,
 4031               BRAILLE,
 4032               COMMON,
 4033               GLAGOLITIC,
 4034               LATIN,
 4035               COPTIC,
 4036               GEORGIAN,
 4037               TIFINAGH,
 4038               ETHIOPIC,
 4039               CYRILLIC,
 4040               COMMON,
 4041               HAN,
 4042               COMMON,
 4043               HAN,
 4044               COMMON,
 4045               HAN,
 4046               COMMON,
 4047               HAN,
 4048               INHERITED,
 4049               HANGUL,
 4050               COMMON,
 4051               HAN,
 4052               COMMON,
 4053               HIRAGANA,
 4054               INHERITED,
 4055               COMMON,
 4056               HIRAGANA,
 4057               COMMON,
 4058               KATAKANA,
 4059               COMMON,
 4060               KATAKANA,
 4061               BOPOMOFO,
 4062               HANGUL,
 4063               COMMON,
 4064               BOPOMOFO,
 4065               COMMON,
 4066               KATAKANA,
 4067               HANGUL,
 4068               COMMON,
 4069               HANGUL,
 4070               COMMON,
 4071               KATAKANA,
 4072               COMMON,
 4073               HAN,
 4074               COMMON,
 4075               HAN,
 4076               YI,
 4077               LISU,
 4078               VAI,
 4079               CYRILLIC,
 4080               BAMUM,
 4081               COMMON,
 4082               LATIN,
 4083               COMMON,
 4084               LATIN,
 4085               SYLOTI_NAGRI,
 4086               COMMON,
 4087               PHAGS_PA,
 4088               SAURASHTRA,
 4089               DEVANAGARI,
 4090               KAYAH_LI,
 4091               REJANG,
 4092               HANGUL,
 4093               JAVANESE,
 4094               CHAM,
 4095               MYANMAR,
 4096               TAI_VIET,
 4097               ETHIOPIC,
 4098               MEETEI_MAYEK,
 4099               HANGUL,
 4100               UNKNOWN,
 4101               HAN,
 4102               LATIN,
 4103               ARMENIAN,
 4104               HEBREW,
 4105               ARABIC,
 4106               COMMON,
 4107               ARABIC,
 4108               COMMON,
 4109               INHERITED,
 4110               COMMON,
 4111               INHERITED,
 4112               COMMON,
 4113               ARABIC,
 4114               COMMON,
 4115               LATIN,
 4116               COMMON,
 4117               LATIN,
 4118               COMMON,
 4119               KATAKANA,
 4120               COMMON,
 4121               KATAKANA,
 4122               COMMON,
 4123               HANGUL,
 4124               COMMON,
 4125               LINEAR_B,
 4126               COMMON,
 4127               GREEK,
 4128               COMMON,
 4129               INHERITED,
 4130               LYCIAN,
 4131               CARIAN,
 4132               OLD_ITALIC,
 4133               GOTHIC,
 4134               UGARITIC,
 4135               OLD_PERSIAN,
 4136               DESERET,
 4137               SHAVIAN,
 4138               OSMANYA,
 4139               CYPRIOT,
 4140               IMPERIAL_ARAMAIC,
 4141               PHOENICIAN,
 4142               LYDIAN,
 4143               KHAROSHTHI,
 4144               OLD_SOUTH_ARABIAN,
 4145               AVESTAN,
 4146               INSCRIPTIONAL_PARTHIAN,
 4147               INSCRIPTIONAL_PAHLAVI,
 4148               OLD_TURKIC,
 4149               ARABIC,
 4150               BRAHMI,
 4151               KAITHI,
 4152               CUNEIFORM,
 4153               EGYPTIAN_HIEROGLYPHS,
 4154               BAMUM,
 4155               KATAKANA,
 4156               HIRAGANA,
 4157               COMMON,
 4158               INHERITED,
 4159               COMMON,
 4160               INHERITED,
 4161               COMMON,
 4162               INHERITED,
 4163               COMMON,
 4164               INHERITED,
 4165               COMMON,
 4166               GREEK,
 4167               COMMON,
 4168               HIRAGANA,
 4169               COMMON,
 4170               HAN,
 4171               COMMON,
 4172               INHERITED,
 4173               UNKNOWN
 4174           };
 4175   
 4176           private static HashMap<String, Character.UnicodeScript> aliases;
 4177           static {
 4178               aliases = new HashMap<>(128);
 4179               aliases.put("ARAB", ARABIC);
 4180               aliases.put("ARMI", IMPERIAL_ARAMAIC);
 4181               aliases.put("ARMN", ARMENIAN);
 4182               aliases.put("AVST", AVESTAN);
 4183               aliases.put("BALI", BALINESE);
 4184               aliases.put("BAMU", BAMUM);
 4185               aliases.put("BATK", BATAK);
 4186               aliases.put("BENG", BENGALI);
 4187               aliases.put("BOPO", BOPOMOFO);
 4188               aliases.put("BRAI", BRAILLE);
 4189               aliases.put("BRAH", BRAHMI);
 4190               aliases.put("BUGI", BUGINESE);
 4191               aliases.put("BUHD", BUHID);
 4192               aliases.put("CANS", CANADIAN_ABORIGINAL);
 4193               aliases.put("CARI", CARIAN);
 4194               aliases.put("CHAM", CHAM);
 4195               aliases.put("CHER", CHEROKEE);
 4196               aliases.put("COPT", COPTIC);
 4197               aliases.put("CPRT", CYPRIOT);
 4198               aliases.put("CYRL", CYRILLIC);
 4199               aliases.put("DEVA", DEVANAGARI);
 4200               aliases.put("DSRT", DESERET);
 4201               aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
 4202               aliases.put("ETHI", ETHIOPIC);
 4203               aliases.put("GEOR", GEORGIAN);
 4204               aliases.put("GLAG", GLAGOLITIC);
 4205               aliases.put("GOTH", GOTHIC);
 4206               aliases.put("GREK", GREEK);
 4207               aliases.put("GUJR", GUJARATI);
 4208               aliases.put("GURU", GURMUKHI);
 4209               aliases.put("HANG", HANGUL);
 4210               aliases.put("HANI", HAN);
 4211               aliases.put("HANO", HANUNOO);
 4212               aliases.put("HEBR", HEBREW);
 4213               aliases.put("HIRA", HIRAGANA);
 4214               // it appears we don't have the KATAKANA_OR_HIRAGANA
 4215               //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
 4216               aliases.put("ITAL", OLD_ITALIC);
 4217               aliases.put("JAVA", JAVANESE);
 4218               aliases.put("KALI", KAYAH_LI);
 4219               aliases.put("KANA", KATAKANA);
 4220               aliases.put("KHAR", KHAROSHTHI);
 4221               aliases.put("KHMR", KHMER);
 4222               aliases.put("KNDA", KANNADA);
 4223               aliases.put("KTHI", KAITHI);
 4224               aliases.put("LANA", TAI_THAM);
 4225               aliases.put("LAOO", LAO);
 4226               aliases.put("LATN", LATIN);
 4227               aliases.put("LEPC", LEPCHA);
 4228               aliases.put("LIMB", LIMBU);
 4229               aliases.put("LINB", LINEAR_B);
 4230               aliases.put("LISU", LISU);
 4231               aliases.put("LYCI", LYCIAN);
 4232               aliases.put("LYDI", LYDIAN);
 4233               aliases.put("MAND", MANDAIC);
 4234               aliases.put("MLYM", MALAYALAM);
 4235               aliases.put("MONG", MONGOLIAN);
 4236               aliases.put("MTEI", MEETEI_MAYEK);
 4237               aliases.put("MYMR", MYANMAR);
 4238               aliases.put("NKOO", NKO);
 4239               aliases.put("OGAM", OGHAM);
 4240               aliases.put("OLCK", OL_CHIKI);
 4241               aliases.put("ORKH", OLD_TURKIC);
 4242               aliases.put("ORYA", ORIYA);
 4243               aliases.put("OSMA", OSMANYA);
 4244               aliases.put("PHAG", PHAGS_PA);
 4245               aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
 4246               aliases.put("PHNX", PHOENICIAN);
 4247               aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
 4248               aliases.put("RJNG", REJANG);
 4249               aliases.put("RUNR", RUNIC);
 4250               aliases.put("SAMR", SAMARITAN);
 4251               aliases.put("SARB", OLD_SOUTH_ARABIAN);
 4252               aliases.put("SAUR", SAURASHTRA);
 4253               aliases.put("SHAW", SHAVIAN);
 4254               aliases.put("SINH", SINHALA);
 4255               aliases.put("SUND", SUNDANESE);
 4256               aliases.put("SYLO", SYLOTI_NAGRI);
 4257               aliases.put("SYRC", SYRIAC);
 4258               aliases.put("TAGB", TAGBANWA);
 4259               aliases.put("TALE", TAI_LE);
 4260               aliases.put("TALU", NEW_TAI_LUE);
 4261               aliases.put("TAML", TAMIL);
 4262               aliases.put("TAVT", TAI_VIET);
 4263               aliases.put("TELU", TELUGU);
 4264               aliases.put("TFNG", TIFINAGH);
 4265               aliases.put("TGLG", TAGALOG);
 4266               aliases.put("THAA", THAANA);
 4267               aliases.put("THAI", THAI);
 4268               aliases.put("TIBT", TIBETAN);
 4269               aliases.put("UGAR", UGARITIC);
 4270               aliases.put("VAII", VAI);
 4271               aliases.put("XPEO", OLD_PERSIAN);
 4272               aliases.put("XSUX", CUNEIFORM);
 4273               aliases.put("YIII", YI);
 4274               aliases.put("ZINH", INHERITED);
 4275               aliases.put("ZYYY", COMMON);
 4276               aliases.put("ZZZZ", UNKNOWN);
 4277           }
 4278   
 4279           /**
 4280            * Returns the enum constant representing the Unicode script of which
 4281            * the given character (Unicode code point) is assigned to.
 4282            *
 4283            * @param   codePoint the character (Unicode code point) in question.
 4284            * @return  The {@code UnicodeScript} constant representing the
 4285            *          Unicode script of which this character is assigned to.
 4286            *
 4287            * @exception IllegalArgumentException if the specified
 4288            * {@code codePoint} is an invalid Unicode code point.
 4289            * @see Character#isValidCodePoint(int)
 4290            *
 4291            */
 4292           public static UnicodeScript of(int codePoint) {
 4293               if (!isValidCodePoint(codePoint))
 4294                   throw new IllegalArgumentException();
 4295               int type = getType(codePoint);
 4296               // leave SURROGATE and PRIVATE_USE for table lookup
 4297               if (type == UNASSIGNED)
 4298                   return UNKNOWN;
 4299               int index = Arrays.binarySearch(scriptStarts, codePoint);
 4300               if (index < 0)
 4301                   index = -index - 2;
 4302               return scripts[index];
 4303           }
 4304   
 4305           /**
 4306            * Returns the UnicodeScript constant with the given Unicode script
 4307            * name or the script name alias. Script names and their aliases are
 4308            * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
 4309            * and PropertyValueAliases&lt;version&gt;.txt define script names
 4310            * and the script name aliases for a particular version of the
 4311            * standard. The {@link Character} class specifies the version of
 4312            * the standard that it supports.
 4313            * <p>
 4314            * Character case is ignored for all of the valid script names.
 4315            * The en_US locale's case mapping rules are used to provide
 4316            * case-insensitive string comparisons for script name validation.
 4317            * <p>
 4318            *
 4319            * @param scriptName A {@code UnicodeScript} name.
 4320            * @return The {@code UnicodeScript} constant identified
 4321            *         by {@code scriptName}
 4322            * @throws IllegalArgumentException if {@code scriptName} is an
 4323            *         invalid name
 4324            * @throws NullPointerException if {@code scriptName} is null
 4325            */
 4326           public static final UnicodeScript forName(String scriptName) {
 4327               scriptName = scriptName.toUpperCase(Locale.ENGLISH);
 4328                                    //.replace(' ', '_'));
 4329               UnicodeScript sc = aliases.get(scriptName);
 4330               if (sc != null)
 4331                   return sc;
 4332               return valueOf(scriptName);
 4333           }
 4334       }
 4335   
 4336       /**
 4337        * The value of the {@code Character}.
 4338        *
 4339        * @serial
 4340        */
 4341       private final char value;
 4342   
 4343       /** use serialVersionUID from JDK 1.0.2 for interoperability */
 4344       private static final long serialVersionUID = 3786198910865385080L;
 4345   
 4346       /**
 4347        * Constructs a newly allocated {@code Character} object that
 4348        * represents the specified {@code char} value.
 4349        *
 4350        * @param  value   the value to be represented by the
 4351        *                  {@code Character} object.
 4352        */
 4353       public Character(char value) {
 4354           this.value = value;
 4355       }
 4356   
 4357       private static class CharacterCache {
 4358           private CharacterCache(){}
 4359   
 4360           static final Character cache[] = new Character[127 + 1];
 4361   
 4362           static {
 4363               for (int i = 0; i < cache.length; i++)
 4364                   cache[i] = new Character((char)i);
 4365           }
 4366       }
 4367   
 4368       /**
 4369        * Returns a <tt>Character</tt> instance representing the specified
 4370        * <tt>char</tt> value.
 4371        * If a new <tt>Character</tt> instance is not required, this method
 4372        * should generally be used in preference to the constructor
 4373        * {@link #Character(char)}, as this method is likely to yield
 4374        * significantly better space and time performance by caching
 4375        * frequently requested values.
 4376        *
 4377        * This method will always cache values in the range {@code
 4378        * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
 4379        * cache other values outside of this range.
 4380        *
 4381        * @param  c a char value.
 4382        * @return a <tt>Character</tt> instance representing <tt>c</tt>.
 4383        * @since  1.5
 4384        */
 4385       public static Character valueOf(char c) {
 4386           if (c <= 127) { // must cache
 4387               return CharacterCache.cache[(int)c];
 4388           }
 4389           return new Character(c);
 4390       }
 4391   
 4392       /**
 4393        * Returns the value of this {@code Character} object.
 4394        * @return  the primitive {@code char} value represented by
 4395        *          this object.
 4396        */
 4397       public char charValue() {
 4398           return value;
 4399       }
 4400   
 4401       /**
 4402        * Returns a hash code for this {@code Character}; equal to the result
 4403        * of invoking {@code charValue()}.
 4404        *
 4405        * @return a hash code value for this {@code Character}
 4406        */
 4407       public int hashCode() {
 4408           return (int)value;
 4409       }
 4410   
 4411       /**
 4412        * Compares this object against the specified object.
 4413        * The result is {@code true} if and only if the argument is not
 4414        * {@code null} and is a {@code Character} object that
 4415        * represents the same {@code char} value as this object.
 4416        *
 4417        * @param   obj   the object to compare with.
 4418        * @return  {@code true} if the objects are the same;
 4419        *          {@code false} otherwise.
 4420        */
 4421       public boolean equals(Object obj) {
 4422           if (obj instanceof Character) {
 4423               return value == ((Character)obj).charValue();
 4424           }
 4425           return false;
 4426       }
 4427   
 4428       /**
 4429        * Returns a {@code String} object representing this
 4430        * {@code Character}'s value.  The result is a string of
 4431        * length 1 whose sole component is the primitive
 4432        * {@code char} value represented by this
 4433        * {@code Character} object.
 4434        *
 4435        * @return  a string representation of this object.
 4436        */
 4437       public String toString() {
 4438           char buf[] = {value};
 4439           return String.valueOf(buf);
 4440       }
 4441   
 4442       /**
 4443        * Returns a {@code String} object representing the
 4444        * specified {@code char}.  The result is a string of length
 4445        * 1 consisting solely of the specified {@code char}.
 4446        *
 4447        * @param c the {@code char} to be converted
 4448        * @return the string representation of the specified {@code char}
 4449        * @since 1.4
 4450        */
 4451       public static String toString(char c) {
 4452           return String.valueOf(c);
 4453       }
 4454   
 4455       /**
 4456        * Determines whether the specified code point is a valid
 4457        * <a href="http://www.unicode.org/glossary/#code_point">
 4458        * Unicode code point value</a>.
 4459        *
 4460        * @param  codePoint the Unicode code point to be tested
 4461        * @return {@code true} if the specified code point value is between
 4462        *         {@link #MIN_CODE_POINT} and
 4463        *         {@link #MAX_CODE_POINT} inclusive;
 4464        *         {@code false} otherwise.
 4465        * @since  1.5
 4466        */
 4467       public static boolean isValidCodePoint(int codePoint) {
 4468           // Optimized form of:
 4469           //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
 4470           int plane = codePoint >>> 16;
 4471           return plane < ((MAX_CODE_POINT + 1) >>> 16);
 4472       }
 4473   
 4474       /**
 4475        * Determines whether the specified character (Unicode code point)
 4476        * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
 4477        * Such code points can be represented using a single {@code char}.
 4478        *
 4479        * @param  codePoint the character (Unicode code point) to be tested
 4480        * @return {@code true} if the specified code point is between
 4481        *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
 4482        *         {@code false} otherwise.
 4483        * @since  1.7
 4484        */
 4485       public static boolean isBmpCodePoint(int codePoint) {
 4486           return codePoint >>> 16 == 0;
 4487           // Optimized form of:
 4488           //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
 4489           // We consistently use logical shift (>>>) to facilitate
 4490           // additional runtime optimizations.
 4491       }
 4492   
 4493       /**
 4494        * Determines whether the specified character (Unicode code point)
 4495        * is in the <a href="#supplementary">supplementary character</a> range.
 4496        *
 4497        * @param  codePoint the character (Unicode code point) to be tested
 4498        * @return {@code true} if the specified code point is between
 4499        *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
 4500        *         {@link #MAX_CODE_POINT} inclusive;
 4501        *         {@code false} otherwise.
 4502        * @since  1.5
 4503        */
 4504       public static boolean isSupplementaryCodePoint(int codePoint) {
 4505           return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
 4506               && codePoint <  MAX_CODE_POINT + 1;
 4507       }
 4508   
 4509       /**
 4510        * Determines if the given {@code char} value is a
 4511        * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 4512        * Unicode high-surrogate code unit</a>
 4513        * (also known as <i>leading-surrogate code unit</i>).
 4514        *
 4515        * <p>Such values do not represent characters by themselves,
 4516        * but are used in the representation of
 4517        * <a href="#supplementary">supplementary characters</a>
 4518        * in the UTF-16 encoding.
 4519        *
 4520        * @param  ch the {@code char} value to be tested.
 4521        * @return {@code true} if the {@code char} value is between
 4522        *         {@link #MIN_HIGH_SURROGATE} and
 4523        *         {@link #MAX_HIGH_SURROGATE} inclusive;
 4524        *         {@code false} otherwise.
 4525        * @see    Character#isLowSurrogate(char)
 4526        * @see    Character.UnicodeBlock#of(int)
 4527        * @since  1.5
 4528        */
 4529       public static boolean isHighSurrogate(char ch) {
 4530           // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
 4531           return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
 4532       }
 4533   
 4534       /**
 4535        * Determines if the given {@code char} value is a
 4536        * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 4537        * Unicode low-surrogate code unit</a>
 4538        * (also known as <i>trailing-surrogate code unit</i>).
 4539        *
 4540        * <p>Such values do not represent characters by themselves,
 4541        * but are used in the representation of
 4542        * <a href="#supplementary">supplementary characters</a>
 4543        * in the UTF-16 encoding.
 4544        *
 4545        * @param  ch the {@code char} value to be tested.
 4546        * @return {@code true} if the {@code char} value is between
 4547        *         {@link #MIN_LOW_SURROGATE} and
 4548        *         {@link #MAX_LOW_SURROGATE} inclusive;
 4549        *         {@code false} otherwise.
 4550        * @see    Character#isHighSurrogate(char)
 4551        * @since  1.5
 4552        */
 4553       public static boolean isLowSurrogate(char ch) {
 4554           return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
 4555       }
 4556   
 4557       /**
 4558        * Determines if the given {@code char} value is a Unicode
 4559        * <i>surrogate code unit</i>.
 4560        *
 4561        * <p>Such values do not represent characters by themselves,
 4562        * but are used in the representation of
 4563        * <a href="#supplementary">supplementary characters</a>
 4564        * in the UTF-16 encoding.
 4565        *
 4566        * <p>A char value is a surrogate code unit if and only if it is either
 4567        * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
 4568        * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
 4569        *
 4570        * @param  ch the {@code char} value to be tested.
 4571        * @return {@code true} if the {@code char} value is between
 4572        *         {@link #MIN_SURROGATE} and
 4573        *         {@link #MAX_SURROGATE} inclusive;
 4574        *         {@code false} otherwise.
 4575        * @since  1.7
 4576        */
 4577       public static boolean isSurrogate(char ch) {
 4578           return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
 4579       }
 4580   
 4581       /**
 4582        * Determines whether the specified pair of {@code char}
 4583        * values is a valid
 4584        * <a href="http://www.unicode.org/glossary/#surrogate_pair">
 4585        * Unicode surrogate pair</a>.
 4586   
 4587        * <p>This method is equivalent to the expression:
 4588        * <blockquote><pre>
 4589        * isHighSurrogate(high) && isLowSurrogate(low)
 4590        * </pre></blockquote>
 4591        *
 4592        * @param  high the high-surrogate code value to be tested
 4593        * @param  low the low-surrogate code value to be tested
 4594        * @return {@code true} if the specified high and
 4595        * low-surrogate code values represent a valid surrogate pair;
 4596        * {@code false} otherwise.
 4597        * @since  1.5
 4598        */
 4599       public static boolean isSurrogatePair(char high, char low) {
 4600           return isHighSurrogate(high) && isLowSurrogate(low);
 4601       }
 4602   
 4603       /**
 4604        * Determines the number of {@code char} values needed to
 4605        * represent the specified character (Unicode code point). If the
 4606        * specified character is equal to or greater than 0x10000, then
 4607        * the method returns 2. Otherwise, the method returns 1.
 4608        *
 4609        * <p>This method doesn't validate the specified character to be a
 4610        * valid Unicode code point. The caller must validate the
 4611        * character value using {@link #isValidCodePoint(int) isValidCodePoint}
 4612        * if necessary.
 4613        *
 4614        * @param   codePoint the character (Unicode code point) to be tested.
 4615        * @return  2 if the character is a valid supplementary character; 1 otherwise.
 4616        * @see     Character#isSupplementaryCodePoint(int)
 4617        * @since   1.5
 4618        */
 4619       public static int charCount(int codePoint) {
 4620           return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
 4621       }
 4622   
 4623       /**
 4624        * Converts the specified surrogate pair to its supplementary code
 4625        * point value. This method does not validate the specified
 4626        * surrogate pair. The caller must validate it using {@link
 4627        * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
 4628        *
 4629        * @param  high the high-surrogate code unit
 4630        * @param  low the low-surrogate code unit
 4631        * @return the supplementary code point composed from the
 4632        *         specified surrogate pair.
 4633        * @since  1.5
 4634        */
 4635       public static int toCodePoint(char high, char low) {
 4636           // Optimized form of:
 4637           // return ((high - MIN_HIGH_SURROGATE) << 10)
 4638           //         + (low - MIN_LOW_SURROGATE)
 4639           //         + MIN_SUPPLEMENTARY_CODE_POINT;
 4640           return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
 4641                                          - (MIN_HIGH_SURROGATE << 10)
 4642                                          - MIN_LOW_SURROGATE);
 4643       }
 4644   
 4645       /**
 4646        * Returns the code point at the given index of the
 4647        * {@code CharSequence}. If the {@code char} value at
 4648        * the given index in the {@code CharSequence} is in the
 4649        * high-surrogate range, the following index is less than the
 4650        * length of the {@code CharSequence}, and the
 4651        * {@code char} value at the following index is in the
 4652        * low-surrogate range, then the supplementary code point
 4653        * corresponding to this surrogate pair is returned. Otherwise,
 4654        * the {@code char} value at the given index is returned.
 4655        *
 4656        * @param seq a sequence of {@code char} values (Unicode code
 4657        * units)
 4658        * @param index the index to the {@code char} values (Unicode
 4659        * code units) in {@code seq} to be converted
 4660        * @return the Unicode code point at the given index
 4661        * @exception NullPointerException if {@code seq} is null.
 4662        * @exception IndexOutOfBoundsException if the value
 4663        * {@code index} is negative or not less than
 4664        * {@link CharSequence#length() seq.length()}.
 4665        * @since  1.5
 4666        */
 4667       public static int codePointAt(CharSequence seq, int index) {
 4668           char c1 = seq.charAt(index++);
 4669           if (isHighSurrogate(c1)) {
 4670               if (index < seq.length()) {
 4671                   char c2 = seq.charAt(index);
 4672                   if (isLowSurrogate(c2)) {
 4673                       return toCodePoint(c1, c2);
 4674                   }
 4675               }
 4676           }
 4677           return c1;
 4678       }
 4679   
 4680       /**
 4681        * Returns the code point at the given index of the
 4682        * {@code char} array. If the {@code char} value at
 4683        * the given index in the {@code char} array is in the
 4684        * high-surrogate range, the following index is less than the
 4685        * length of the {@code char} array, and the
 4686        * {@code char} value at the following index is in the
 4687        * low-surrogate range, then the supplementary code point
 4688        * corresponding to this surrogate pair is returned. Otherwise,
 4689        * the {@code char} value at the given index is returned.
 4690        *
 4691        * @param a the {@code char} array
 4692        * @param index the index to the {@code char} values (Unicode
 4693        * code units) in the {@code char} array to be converted
 4694        * @return the Unicode code point at the given index
 4695        * @exception NullPointerException if {@code a} is null.
 4696        * @exception IndexOutOfBoundsException if the value
 4697        * {@code index} is negative or not less than
 4698        * the length of the {@code char} array.
 4699        * @since  1.5
 4700        */
 4701       public static int codePointAt(char[] a, int index) {
 4702           return codePointAtImpl(a, index, a.length);
 4703       }
 4704   
 4705       /**
 4706        * Returns the code point at the given index of the
 4707        * {@code char} array, where only array elements with
 4708        * {@code index} less than {@code limit} can be used. If
 4709        * the {@code char} value at the given index in the
 4710        * {@code char} array is in the high-surrogate range, the
 4711        * following index is less than the {@code limit}, and the
 4712        * {@code char} value at the following index is in the
 4713        * low-surrogate range, then the supplementary code point
 4714        * corresponding to this surrogate pair is returned. Otherwise,
 4715        * the {@code char} value at the given index is returned.
 4716        *
 4717        * @param a the {@code char} array
 4718        * @param index the index to the {@code char} values (Unicode
 4719        * code units) in the {@code char} array to be converted
 4720        * @param limit the index after the last array element that
 4721        * can be used in the {@code char} array
 4722        * @return the Unicode code point at the given index
 4723        * @exception NullPointerException if {@code a} is null.
 4724        * @exception IndexOutOfBoundsException if the {@code index}
 4725        * argument is negative or not less than the {@code limit}
 4726        * argument, or if the {@code limit} argument is negative or
 4727        * greater than the length of the {@code char} array.
 4728        * @since  1.5
 4729        */
 4730       public static int codePointAt(char[] a, int index, int limit) {
 4731           if (index >= limit || limit < 0 || limit > a.length) {
 4732               throw new IndexOutOfBoundsException();
 4733           }
 4734           return codePointAtImpl(a, index, limit);
 4735       }
 4736   
 4737       // throws ArrayIndexOutofBoundsException if index out of bounds
 4738       static int codePointAtImpl(char[] a, int index, int limit) {
 4739           char c1 = a[index++];
 4740           if (isHighSurrogate(c1)) {
 4741               if (index < limit) {
 4742                   char c2 = a[index];
 4743                   if (isLowSurrogate(c2)) {
 4744                       return toCodePoint(c1, c2);
 4745                   }
 4746               }
 4747           }
 4748           return c1;
 4749       }
 4750   
 4751       /**
 4752        * Returns the code point preceding the given index of the
 4753        * {@code CharSequence}. If the {@code char} value at
 4754        * {@code (index - 1)} in the {@code CharSequence} is in
 4755        * the low-surrogate range, {@code (index - 2)} is not
 4756        * negative, and the {@code char} value at {@code (index - 2)}
 4757        * in the {@code CharSequence} is in the
 4758        * high-surrogate range, then the supplementary code point
 4759        * corresponding to this surrogate pair is returned. Otherwise,
 4760        * the {@code char} value at {@code (index - 1)} is
 4761        * returned.
 4762        *
 4763        * @param seq the {@code CharSequence} instance
 4764        * @param index the index following the code point that should be returned
 4765        * @return the Unicode code point value before the given index.
 4766        * @exception NullPointerException if {@code seq} is null.
 4767        * @exception IndexOutOfBoundsException if the {@code index}
 4768        * argument is less than 1 or greater than {@link
 4769        * CharSequence#length() seq.length()}.
 4770        * @since  1.5
 4771        */
 4772       public static int codePointBefore(CharSequence seq, int index) {
 4773           char c2 = seq.charAt(--index);
 4774           if (isLowSurrogate(c2)) {
 4775               if (index > 0) {
 4776                   char c1 = seq.charAt(--index);
 4777                   if (isHighSurrogate(c1)) {
 4778                       return toCodePoint(c1, c2);
 4779                   }
 4780               }
 4781           }
 4782           return c2;
 4783       }
 4784   
 4785       /**
 4786        * Returns the code point preceding the given index of the
 4787        * {@code char} array. If the {@code char} value at
 4788        * {@code (index - 1)} in the {@code char} array is in
 4789        * the low-surrogate range, {@code (index - 2)} is not
 4790        * negative, and the {@code char} value at {@code (index - 2)}
 4791        * in the {@code char} array is in the
 4792        * high-surrogate range, then the supplementary code point
 4793        * corresponding to this surrogate pair is returned. Otherwise,
 4794        * the {@code char} value at {@code (index - 1)} is
 4795        * returned.
 4796        *
 4797        * @param a the {@code char} array
 4798        * @param index the index following the code point that should be returned
 4799        * @return the Unicode code point value before the given index.
 4800        * @exception NullPointerException if {@code a} is null.
 4801        * @exception IndexOutOfBoundsException if the {@code index}
 4802        * argument is less than 1 or greater than the length of the
 4803        * {@code char} array
 4804        * @since  1.5
 4805        */
 4806       public static int codePointBefore(char[] a, int index) {
 4807           return codePointBeforeImpl(a, index, 0);
 4808       }
 4809   
 4810       /**
 4811        * Returns the code point preceding the given index of the
 4812        * {@code char} array, where only array elements with
 4813        * {@code index} greater than or equal to {@code start}
 4814        * can be used. If the {@code char} value at {@code (index - 1)}
 4815        * in the {@code char} array is in the
 4816        * low-surrogate range, {@code (index - 2)} is not less than
 4817        * {@code start}, and the {@code char} value at
 4818        * {@code (index - 2)} in the {@code char} array is in
 4819        * the high-surrogate range, then the supplementary code point
 4820        * corresponding to this surrogate pair is returned. Otherwise,
 4821        * the {@code char} value at {@code (index - 1)} is
 4822        * returned.
 4823        *
 4824        * @param a the {@code char} array
 4825        * @param index the index following the code point that should be returned
 4826        * @param start the index of the first array element in the
 4827        * {@code char} array
 4828        * @return the Unicode code point value before the given index.
 4829        * @exception NullPointerException if {@code a} is null.
 4830        * @exception IndexOutOfBoundsException if the {@code index}
 4831        * argument is not greater than the {@code start} argument or
 4832        * is greater than the length of the {@code char} array, or
 4833        * if the {@code start} argument is negative or not less than
 4834        * the length of the {@code char} array.
 4835        * @since  1.5
 4836        */
 4837       public static int codePointBefore(char[] a, int index, int start) {
 4838           if (index <= start || start < 0 || start >= a.length) {
 4839               throw new IndexOutOfBoundsException();
 4840           }
 4841           return codePointBeforeImpl(a, index, start);
 4842       }
 4843   
 4844       // throws ArrayIndexOutofBoundsException if index-1 out of bounds
 4845       static int codePointBeforeImpl(char[] a, int index, int start) {
 4846           char c2 = a[--index];
 4847           if (isLowSurrogate(c2)) {
 4848               if (index > start) {
 4849                   char c1 = a[--index];
 4850                   if (isHighSurrogate(c1)) {
 4851                       return toCodePoint(c1, c2);
 4852                   }
 4853               }
 4854           }
 4855           return c2;
 4856       }
 4857   
 4858       /**
 4859        * Returns the leading surrogate (a
 4860        * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 4861        * high surrogate code unit</a>) of the
 4862        * <a href="http://www.unicode.org/glossary/#surrogate_pair">
 4863        * surrogate pair</a>
 4864        * representing the specified supplementary character (Unicode
 4865        * code point) in the UTF-16 encoding.  If the specified character
 4866        * is not a
 4867        * <a href="Character.html#supplementary">supplementary character</a>,
 4868        * an unspecified {@code char} is returned.
 4869        *
 4870        * <p>If
 4871        * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
 4872        * is {@code true}, then
 4873        * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
 4874        * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
 4875        * are also always {@code true}.
 4876        *
 4877        * @param   codePoint a supplementary character (Unicode code point)
 4878        * @return  the leading surrogate code unit used to represent the
 4879        *          character in the UTF-16 encoding
 4880        * @since   1.7
 4881        */
 4882       public static char highSurrogate(int codePoint) {
 4883           return (char) ((codePoint >>> 10)
 4884               + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
 4885       }
 4886   
 4887       /**
 4888        * Returns the trailing surrogate (a
 4889        * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 4890        * low surrogate code unit</a>) of the
 4891        * <a href="http://www.unicode.org/glossary/#surrogate_pair">
 4892        * surrogate pair</a>
 4893        * representing the specified supplementary character (Unicode
 4894        * code point) in the UTF-16 encoding.  If the specified character
 4895        * is not a
 4896        * <a href="Character.html#supplementary">supplementary character</a>,
 4897        * an unspecified {@code char} is returned.
 4898        *
 4899        * <p>If
 4900        * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
 4901        * is {@code true}, then
 4902        * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
 4903        * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
 4904        * are also always {@code true}.
 4905        *
 4906        * @param   codePoint a supplementary character (Unicode code point)
 4907        * @return  the trailing surrogate code unit used to represent the
 4908        *          character in the UTF-16 encoding
 4909        * @since   1.7
 4910        */
 4911       public static char lowSurrogate(int codePoint) {
 4912           return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
 4913       }
 4914   
 4915       /**
 4916        * Converts the specified character (Unicode code point) to its
 4917        * UTF-16 representation. If the specified code point is a BMP
 4918        * (Basic Multilingual Plane or Plane 0) value, the same value is
 4919        * stored in {@code dst[dstIndex]}, and 1 is returned. If the
 4920        * specified code point is a supplementary character, its
 4921        * surrogate values are stored in {@code dst[dstIndex]}
 4922        * (high-surrogate) and {@code dst[dstIndex+1]}
 4923        * (low-surrogate), and 2 is returned.
 4924        *
 4925        * @param  codePoint the character (Unicode code point) to be converted.
 4926        * @param  dst an array of {@code char} in which the
 4927        * {@code codePoint}'s UTF-16 value is stored.
 4928        * @param dstIndex the start index into the {@code dst}
 4929        * array where the converted value is stored.
 4930        * @return 1 if the code point is a BMP code point, 2 if the
 4931        * code point is a supplementary code point.
 4932        * @exception IllegalArgumentException if the specified
 4933        * {@code codePoint} is not a valid Unicode code point.
 4934        * @exception NullPointerException if the specified {@code dst} is null.
 4935        * @exception IndexOutOfBoundsException if {@code dstIndex}
 4936        * is negative or not less than {@code dst.length}, or if
 4937        * {@code dst} at {@code dstIndex} doesn't have enough
 4938        * array element(s) to store the resulting {@code char}
 4939        * value(s). (If {@code dstIndex} is equal to
 4940        * {@code dst.length-1} and the specified
 4941        * {@code codePoint} is a supplementary character, the
 4942        * high-surrogate value is not stored in
 4943        * {@code dst[dstIndex]}.)
 4944        * @since  1.5
 4945        */
 4946       public static int toChars(int codePoint, char[] dst, int dstIndex) {
 4947           if (isBmpCodePoint(codePoint)) {
 4948               dst[dstIndex] = (char) codePoint;
 4949               return 1;
 4950           } else if (isValidCodePoint(codePoint)) {
 4951               toSurrogates(codePoint, dst, dstIndex);
 4952               return 2;
 4953           } else {
 4954               throw new IllegalArgumentException();
 4955           }
 4956       }
 4957   
 4958       /**
 4959        * Converts the specified character (Unicode code point) to its
 4960        * UTF-16 representation stored in a {@code char} array. If
 4961        * the specified code point is a BMP (Basic Multilingual Plane or
 4962        * Plane 0) value, the resulting {@code char} array has
 4963        * the same value as {@code codePoint}. If the specified code
 4964        * point is a supplementary code point, the resulting
 4965        * {@code char} array has the corresponding surrogate pair.
 4966        *
 4967        * @param  codePoint a Unicode code point
 4968        * @return a {@code char} array having
 4969        *         {@code codePoint}'s UTF-16 representation.
 4970        * @exception IllegalArgumentException if the specified
 4971        * {@code codePoint} is not a valid Unicode code point.
 4972        * @since  1.5
 4973        */
 4974       public static char[] toChars(int codePoint) {
 4975           if (isBmpCodePoint(codePoint)) {
 4976               return new char[] { (char) codePoint };
 4977           } else if (isValidCodePoint(codePoint)) {
 4978               char[] result = new char[2];
 4979               toSurrogates(codePoint, result, 0);
 4980               return result;
 4981           } else {
 4982               throw new IllegalArgumentException();
 4983           }
 4984       }
 4985   
 4986       static void toSurrogates(int codePoint, char[] dst, int index) {
 4987           // We write elements "backwards" to guarantee all-or-nothing
 4988           dst[index+1] = lowSurrogate(codePoint);
 4989           dst[index] = highSurrogate(codePoint);
 4990       }
 4991   
 4992       /**
 4993        * Returns the number of Unicode code points in the text range of
 4994        * the specified char sequence. The text range begins at the
 4995        * specified {@code beginIndex} and extends to the
 4996        * {@code char} at index {@code endIndex - 1}. Thus the
 4997        * length (in {@code char}s) of the text range is
 4998        * {@code endIndex-beginIndex}. Unpaired surrogates within
 4999        * the text range count as one code point each.
 5000        *
 5001        * @param seq the char sequence
 5002        * @param beginIndex the index to the first {@code char} of
 5003        * the text range.
 5004        * @param endIndex the index after the last {@code char} of
 5005        * the text range.
 5006        * @return the number of Unicode code points in the specified text
 5007        * range
 5008        * @exception NullPointerException if {@code seq} is null.
 5009        * @exception IndexOutOfBoundsException if the
 5010        * {@code beginIndex} is negative, or {@code endIndex}
 5011        * is larger than the length of the given sequence, or
 5012        * {@code beginIndex} is larger than {@code endIndex}.
 5013        * @since  1.5
 5014        */
 5015       public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
 5016           int length = seq.length();
 5017           if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
 5018               throw new IndexOutOfBoundsException();
 5019           }
 5020           int n = endIndex - beginIndex;
 5021           for (int i = beginIndex; i < endIndex; ) {
 5022               if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
 5023                   isLowSurrogate(seq.charAt(i))) {
 5024                   n--;
 5025                   i++;
 5026               }
 5027           }
 5028           return n;
 5029       }
 5030   
 5031       /**
 5032        * Returns the number of Unicode code points in a subarray of the
 5033        * {@code char} array argument. The {@code offset}
 5034        * argument is the index of the first {@code char} of the
 5035        * subarray and the {@code count} argument specifies the
 5036        * length of the subarray in {@code char}s. Unpaired
 5037        * surrogates within the subarray count as one code point each.
 5038        *
 5039        * @param a the {@code char} array
 5040        * @param offset the index of the first {@code char} in the
 5041        * given {@code char} array
 5042        * @param count the length of the subarray in {@code char}s
 5043        * @return the number of Unicode code points in the specified subarray
 5044        * @exception NullPointerException if {@code a} is null.
 5045        * @exception IndexOutOfBoundsException if {@code offset} or
 5046        * {@code count} is negative, or if {@code offset +
 5047        * count} is larger than the length of the given array.
 5048        * @since  1.5
 5049        */
 5050       public static int codePointCount(char[] a, int offset, int count) {
 5051           if (count > a.length - offset || offset < 0 || count < 0) {
 5052               throw new IndexOutOfBoundsException();
 5053           }
 5054           return codePointCountImpl(a, offset, count);
 5055       }
 5056   
 5057       static int codePointCountImpl(char[] a, int offset, int count) {
 5058           int endIndex = offset + count;
 5059           int n = count;
 5060           for (int i = offset; i < endIndex; ) {
 5061               if (isHighSurrogate(a[i++]) && i < endIndex &&
 5062                   isLowSurrogate(a[i])) {
 5063                   n--;
 5064                   i++;
 5065               }
 5066           }
 5067           return n;
 5068       }
 5069   
 5070       /**
 5071        * Returns the index within the given char sequence that is offset
 5072        * from the given {@code index} by {@code codePointOffset}
 5073        * code points. Unpaired surrogates within the text range given by
 5074        * {@code index} and {@code codePointOffset} count as
 5075        * one code point each.
 5076        *
 5077        * @param seq the char sequence
 5078        * @param index the index to be offset
 5079        * @param codePointOffset the offset in code points
 5080        * @return the index within the char sequence
 5081        * @exception NullPointerException if {@code seq} is null.
 5082        * @exception IndexOutOfBoundsException if {@code index}
 5083        *   is negative or larger then the length of the char sequence,
 5084        *   or if {@code codePointOffset} is positive and the
 5085        *   subsequence starting with {@code index} has fewer than
 5086        *   {@code codePointOffset} code points, or if
 5087        *   {@code codePointOffset} is negative and the subsequence
 5088        *   before {@code index} has fewer than the absolute value
 5089        *   of {@code codePointOffset} code points.
 5090        * @since 1.5
 5091        */
 5092       public static int offsetByCodePoints(CharSequence seq, int index,
 5093                                            int codePointOffset) {
 5094           int length = seq.length();
 5095           if (index < 0 || index > length) {
 5096               throw new IndexOutOfBoundsException();
 5097           }
 5098   
 5099           int x = index;
 5100           if (codePointOffset >= 0) {
 5101               int i;
 5102               for (i = 0; x < length && i < codePointOffset; i++) {
 5103                   if (isHighSurrogate(seq.charAt(x++)) && x < length &&
 5104                       isLowSurrogate(seq.charAt(x))) {
 5105                       x++;
 5106                   }
 5107               }
 5108               if (i < codePointOffset) {
 5109                   throw new IndexOutOfBoundsException();
 5110               }
 5111           } else {
 5112               int i;
 5113               for (i = codePointOffset; x > 0 && i < 0; i++) {
 5114                   if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
 5115                       isHighSurrogate(seq.charAt(x-1))) {
 5116                       x--;
 5117                   }
 5118               }
 5119               if (i < 0) {
 5120                   throw new IndexOutOfBoundsException();
 5121               }
 5122           }
 5123           return x;
 5124       }
 5125   
 5126       /**
 5127        * Returns the index within the given {@code char} subarray
 5128        * that is offset from the given {@code index} by
 5129        * {@code codePointOffset} code points. The
 5130        * {@code start} and {@code count} arguments specify a
 5131        * subarray of the {@code char} array. Unpaired surrogates
 5132        * within the text range given by {@code index} and
 5133        * {@code codePointOffset} count as one code point each.
 5134        *
 5135        * @param a the {@code char} array
 5136        * @param start the index of the first {@code char} of the
 5137        * subarray
 5138        * @param count the length of the subarray in {@code char}s
 5139        * @param index the index to be offset
 5140        * @param codePointOffset the offset in code points
 5141        * @return the index within the subarray
 5142        * @exception NullPointerException if {@code a} is null.
 5143        * @exception IndexOutOfBoundsException
 5144        *   if {@code start} or {@code count} is negative,
 5145        *   or if {@code start + count} is larger than the length of
 5146        *   the given array,
 5147        *   or if {@code index} is less than {@code start} or
 5148        *   larger then {@code start + count},
 5149        *   or if {@code codePointOffset} is positive and the text range
 5150        *   starting with {@code index} and ending with {@code start + count - 1}
 5151        *   has fewer than {@code codePointOffset} code
 5152        *   points,
 5153        *   or if {@code codePointOffset} is negative and the text range
 5154        *   starting with {@code start} and ending with {@code index - 1}
 5155        *   has fewer than the absolute value of
 5156        *   {@code codePointOffset} code points.
 5157        * @since 1.5
 5158        */
 5159       public static int offsetByCodePoints(char[] a, int start, int count,
 5160                                            int index, int codePointOffset) {
 5161           if (count > a.length-start || start < 0 || count < 0
 5162               || index < start || index > start+count) {
 5163               throw new IndexOutOfBoundsException();
 5164           }
 5165           return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
 5166       }
 5167   
 5168       static int offsetByCodePointsImpl(char[]a, int start, int count,
 5169                                         int index, int codePointOffset) {
 5170           int x = index;
 5171           if (codePointOffset >= 0) {
 5172               int limit = start + count;
 5173               int i;
 5174               for (i = 0; x < limit && i < codePointOffset; i++) {
 5175                   if (isHighSurrogate(a[x++]) && x < limit &&
 5176                       isLowSurrogate(a[x])) {
 5177                       x++;
 5178                   }
 5179               }
 5180               if (i < codePointOffset) {
 5181                   throw new IndexOutOfBoundsException();
 5182               }
 5183           } else {
 5184               int i;
 5185               for (i = codePointOffset; x > start && i < 0; i++) {
 5186                   if (isLowSurrogate(a[--x]) && x > start &&
 5187                       isHighSurrogate(a[x-1])) {
 5188                       x--;
 5189                   }
 5190               }
 5191               if (i < 0) {
 5192                   throw new IndexOutOfBoundsException();
 5193               }
 5194           }
 5195           return x;
 5196       }
 5197   
 5198       /**
 5199        * Determines if the specified character is a lowercase character.
 5200        * <p>
 5201        * A character is lowercase if its general category type, provided
 5202        * by {@code Character.getType(ch)}, is
 5203        * {@code LOWERCASE_LETTER}, or it has contributory property
 5204        * Other_Lowercase as defined by the Unicode Standard.
 5205        * <p>
 5206        * The following are examples of lowercase characters:
 5207        * <p><blockquote><pre>
 5208        * a b c d e f g h i j k l m n o p q r s t u v w x y z
 5209        * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
 5210        * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
 5211        * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
 5212        * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
 5213        * </pre></blockquote>
 5214        * <p> Many other Unicode characters are lowercase too.
 5215        *
 5216        * <p><b>Note:</b> This method cannot handle <a
 5217        * href="#supplementary"> supplementary characters</a>. To support
 5218        * all Unicode characters, including supplementary characters, use
 5219        * the {@link #isLowerCase(int)} method.
 5220        *
 5221        * @param   ch   the character to be tested.
 5222        * @return  {@code true} if the character is lowercase;
 5223        *          {@code false} otherwise.
 5224        * @see     Character#isLowerCase(char)
 5225        * @see     Character#isTitleCase(char)
 5226        * @see     Character#toLowerCase(char)
 5227        * @see     Character#getType(char)
 5228        */
 5229       public static boolean isLowerCase(char ch) {
 5230           return isLowerCase((int)ch);
 5231       }
 5232   
 5233       /**
 5234        * Determines if the specified character (Unicode code point) is a
 5235        * lowercase character.
 5236        * <p>
 5237        * A character is lowercase if its general category type, provided
 5238        * by {@link Character#getType getType(codePoint)}, is
 5239        * {@code LOWERCASE_LETTER}, or it has contributory property
 5240        * Other_Lowercase as defined by the Unicode Standard.
 5241        * <p>
 5242        * The following are examples of lowercase characters:
 5243        * <p><blockquote><pre>
 5244        * a b c d e f g h i j k l m n o p q r s t u v w x y z
 5245        * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
 5246        * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
 5247        * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
 5248        * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
 5249        * </pre></blockquote>
 5250        * <p> Many other Unicode characters are lowercase too.
 5251        *
 5252        * @param   codePoint the character (Unicode code point) to be tested.
 5253        * @return  {@code true} if the character is lowercase;
 5254        *          {@code false} otherwise.
 5255        * @see     Character#isLowerCase(int)
 5256        * @see     Character#isTitleCase(int)
 5257        * @see     Character#toLowerCase(int)
 5258        * @see     Character#getType(int)
 5259        * @since   1.5
 5260        */
 5261       public static boolean isLowerCase(int codePoint) {
 5262           return getType(codePoint) == Character.LOWERCASE_LETTER ||
 5263                  CharacterData.of(codePoint).isOtherLowercase(codePoint);
 5264       }
 5265   
 5266       /**
 5267        * Determines if the specified character is an uppercase character.
 5268        * <p>
 5269        * A character is uppercase if its general category type, provided by
 5270        * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
 5271        * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
 5272        * <p>
 5273        * The following are examples of uppercase characters:
 5274        * <p><blockquote><pre>
 5275        * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
 5276        * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
 5277        * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
 5278        * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
 5279        * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
 5280        * </pre></blockquote>
 5281        * <p> Many other Unicode characters are uppercase too.<p>
 5282        *
 5283        * <p><b>Note:</b> This method cannot handle <a
 5284        * href="#supplementary"> supplementary characters</a>. To support
 5285        * all Unicode characters, including supplementary characters, use
 5286        * the {@link #isUpperCase(int)} method.
 5287        *
 5288        * @param   ch   the character to be tested.
 5289        * @return  {@code true} if the character is uppercase;
 5290        *          {@code false} otherwise.
 5291        * @see     Character#isLowerCase(char)
 5292        * @see     Character#isTitleCase(char)
 5293        * @see     Character#toUpperCase(char)
 5294        * @see     Character#getType(char)
 5295        * @since   1.0
 5296        */
 5297       public static boolean isUpperCase(char ch) {
 5298           return isUpperCase((int)ch);
 5299       }
 5300   
 5301       /**
 5302        * Determines if the specified character (Unicode code point) is an uppercase character.
 5303        * <p>
 5304        * A character is uppercase if its general category type, provided by
 5305        * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
 5306        * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
 5307        * <p>
 5308        * The following are examples of uppercase characters:
 5309        * <p><blockquote><pre>
 5310        * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
 5311        * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
 5312        * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
 5313        * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
 5314        * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
 5315        * </pre></blockquote>
 5316        * <p> Many other Unicode characters are uppercase too.<p>
 5317        *
 5318        * @param   codePoint the character (Unicode code point) to be tested.
 5319        * @return  {@code true} if the character is uppercase;
 5320        *          {@code false} otherwise.
 5321        * @see     Character#isLowerCase(int)
 5322        * @see     Character#isTitleCase(int)
 5323        * @see     Character#toUpperCase(int)
 5324        * @see     Character#getType(int)
 5325        * @since   1.5
 5326        */
 5327       public static boolean isUpperCase(int codePoint) {
 5328           return getType(codePoint) == Character.UPPERCASE_LETTER ||
 5329                  CharacterData.of(codePoint).isOtherUppercase(codePoint);
 5330       }
 5331   
 5332       /**
 5333        * Determines if the specified character is a titlecase character.
 5334        * <p>
 5335        * A character is a titlecase character if its general
 5336        * category type, provided by {@code Character.getType(ch)},
 5337        * is {@code TITLECASE_LETTER}.
 5338        * <p>
 5339        * Some characters look like pairs of Latin letters. For example, there
 5340        * is an uppercase letter that looks like "LJ" and has a corresponding
 5341        * lowercase letter that looks like "lj". A third form, which looks like "Lj",
 5342        * is the appropriate form to use when rendering a word in lowercase
 5343        * with initial capitals, as for a book title.
 5344        * <p>
 5345        * These are some of the Unicode characters for which this method returns
 5346        * {@code true}:
 5347        * <ul>
 5348        * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
 5349        * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
 5350        * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
 5351        * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
 5352        * </ul>
 5353        * <p> Many other Unicode characters are titlecase too.<p>
 5354        *
 5355        * <p><b>Note:</b> This method cannot handle <a
 5356        * href="#supplementary"> supplementary characters</a>. To support
 5357        * all Unicode characters, including supplementary characters, use
 5358        * the {@link #isTitleCase(int)} method.
 5359        *
 5360        * @param   ch   the character to be tested.
 5361        * @return  {@code true} if the character is titlecase;
 5362        *          {@code false} otherwise.
 5363        * @see     Character#isLowerCase(char)
 5364        * @see     Character#isUpperCase(char)
 5365        * @see     Character#toTitleCase(char)
 5366        * @see     Character#getType(char)
 5367        * @since   1.0.2
 5368        */
 5369       public static boolean isTitleCase(char ch) {
 5370           return isTitleCase((int)ch);
 5371       }
 5372   
 5373       /**
 5374        * Determines if the specified character (Unicode code point) is a titlecase character.
 5375        * <p>
 5376        * A character is a titlecase character if its general
 5377        * category type, provided by {@link Character#getType(int) getType(codePoint)},
 5378        * is {@code TITLECASE_LETTER}.
 5379        * <p>
 5380        * Some characters look like pairs of Latin letters. For example, there
 5381        * is an uppercase letter that looks like "LJ" and has a corresponding
 5382        * lowercase letter that looks like "lj". A third form, which looks like "Lj",
 5383        * is the appropriate form to use when rendering a word in lowercase
 5384        * with initial capitals, as for a book title.
 5385        * <p>
 5386        * These are some of the Unicode characters for which this method returns
 5387        * {@code true}:
 5388        * <ul>
 5389        * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
 5390        * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
 5391        * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
 5392        * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
 5393        * </ul>
 5394        * <p> Many other Unicode characters are titlecase too.<p>
 5395        *
 5396        * @param   codePoint the character (Unicode code point) to be tested.
 5397        * @return  {@code true} if the character is titlecase;
 5398        *          {@code false} otherwise.
 5399        * @see     Character#isLowerCase(int)
 5400        * @see     Character#isUpperCase(int)
 5401        * @see     Character#toTitleCase(int)
 5402        * @see     Character#getType(int)
 5403        * @since   1.5
 5404        */
 5405       public static boolean isTitleCase(int codePoint) {
 5406           return getType(codePoint) == Character.TITLECASE_LETTER;
 5407       }
 5408   
 5409       /**
 5410        * Determines if the specified character is a digit.
 5411        * <p>
 5412        * A character is a digit if its general category type, provided
 5413        * by {@code Character.getType(ch)}, is
 5414        * {@code DECIMAL_DIGIT_NUMBER}.
 5415        * <p>
 5416        * Some Unicode character ranges that contain digits:
 5417        * <ul>
 5418        * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
 5419        *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
 5420        * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
 5421        *     Arabic-Indic digits
 5422        * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
 5423        *     Extended Arabic-Indic digits
 5424        * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
 5425        *     Devanagari digits
 5426        * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
 5427        *     Fullwidth digits
 5428        * </ul>
 5429        *
 5430        * Many other character ranges contain digits as well.
 5431        *
 5432        * <p><b>Note:</b> This method cannot handle <a
 5433        * href="#supplementary"> supplementary characters</a>. To support
 5434        * all Unicode characters, including supplementary characters, use
 5435        * the {@link #isDigit(int)} method.
 5436        *
 5437        * @param   ch   the character to be tested.
 5438        * @return  {@code true} if the character is a digit;
 5439        *          {@code false} otherwise.
 5440        * @see     Character#digit(char, int)
 5441        * @see     Character#forDigit(int, int)
 5442        * @see     Character#getType(char)
 5443        */
 5444       public static boolean isDigit(char ch) {
 5445           return isDigit((int)ch);
 5446       }
 5447   
 5448       /**
 5449        * Determines if the specified character (Unicode code point) is a digit.
 5450        * <p>
 5451        * A character is a digit if its general category type, provided
 5452        * by {@link Character#getType(int) getType(codePoint)}, is
 5453        * {@code DECIMAL_DIGIT_NUMBER}.
 5454        * <p>
 5455        * Some Unicode character ranges that contain digits:
 5456        * <ul>
 5457        * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
 5458        *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
 5459        * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
 5460        *     Arabic-Indic digits
 5461        * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
 5462        *     Extended Arabic-Indic digits
 5463        * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
 5464        *     Devanagari digits
 5465        * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
 5466        *     Fullwidth digits
 5467        * </ul>
 5468        *
 5469        * Many other character ranges contain digits as well.
 5470        *
 5471        * @param   codePoint the character (Unicode code point) to be tested.
 5472        * @return  {@code true} if the character is a digit;
 5473        *          {@code false} otherwise.
 5474        * @see     Character#forDigit(int, int)
 5475        * @see     Character#getType(int)
 5476        * @since   1.5
 5477        */
 5478       public static boolean isDigit(int codePoint) {
 5479           return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
 5480       }
 5481   
 5482       /**
 5483        * Determines if a character is defined in Unicode.
 5484        * <p>
 5485        * A character is defined if at least one of the following is true:
 5486        * <ul>
 5487        * <li>It has an entry in the UnicodeData file.
 5488        * <li>It has a value in a range defined by the UnicodeData file.
 5489        * </ul>
 5490        *
 5491        * <p><b>Note:</b> This method cannot handle <a
 5492        * href="#supplementary"> supplementary characters</a>. To support
 5493        * all Unicode characters, including supplementary characters, use
 5494        * the {@link #isDefined(int)} method.
 5495        *
 5496        * @param   ch   the character to be tested
 5497        * @return  {@code true} if the character has a defined meaning
 5498        *          in Unicode; {@code false} otherwise.
 5499        * @see     Character#isDigit(char)
 5500        * @see     Character#isLetter(char)
 5501        * @see     Character#isLetterOrDigit(char)
 5502        * @see     Character#isLowerCase(char)
 5503        * @see     Character#isTitleCase(char)
 5504        * @see     Character#isUpperCase(char)
 5505        * @since   1.0.2
 5506        */
 5507       public static boolean isDefined(char ch) {
 5508           return isDefined((int)ch);
 5509       }
 5510   
 5511       /**
 5512        * Determines if a character (Unicode code point) is defined in Unicode.
 5513        * <p>
 5514        * A character is defined if at least one of the following is true:
 5515        * <ul>
 5516        * <li>It has an entry in the UnicodeData file.
 5517        * <li>It has a value in a range defined by the UnicodeData file.
 5518        * </ul>
 5519        *
 5520        * @param   codePoint the character (Unicode code point) to be tested.
 5521        * @return  {@code true} if the character has a defined meaning
 5522        *          in Unicode; {@code false} otherwise.
 5523        * @see     Character#isDigit(int)
 5524        * @see     Character#isLetter(int)
 5525        * @see     Character#isLetterOrDigit(int)
 5526        * @see     Character#isLowerCase(int)
 5527        * @see     Character#isTitleCase(int)
 5528        * @see     Character#isUpperCase(int)
 5529        * @since   1.5
 5530        */
 5531       public static boolean isDefined(int codePoint) {
 5532           return getType(codePoint) != Character.UNASSIGNED;
 5533       }
 5534   
 5535       /**
 5536        * Determines if the specified character is a letter.
 5537        * <p>
 5538        * A character is considered to be a letter if its general
 5539        * category type, provided by {@code Character.getType(ch)},
 5540        * is any of the following:
 5541        * <ul>
 5542        * <li> {@code UPPERCASE_LETTER}
 5543        * <li> {@code LOWERCASE_LETTER}
 5544        * <li> {@code TITLECASE_LETTER}
 5545        * <li> {@code MODIFIER_LETTER}
 5546        * <li> {@code OTHER_LETTER}
 5547        * </ul>
 5548        *
 5549        * Not all letters have case. Many characters are
 5550        * letters but are neither uppercase nor lowercase nor titlecase.
 5551        *
 5552        * <p><b>Note:</b> This method cannot handle <a
 5553        * href="#supplementary"> supplementary characters</a>. To support
 5554        * all Unicode characters, including supplementary characters, use
 5555        * the {@link #isLetter(int)} method.
 5556        *
 5557        * @param   ch   the character to be tested.
 5558        * @return  {@code true} if the character is a letter;
 5559        *          {@code false} otherwise.
 5560        * @see     Character#isDigit(char)
 5561        * @see     Character#isJavaIdentifierStart(char)
 5562        * @see     Character#isJavaLetter(char)
 5563        * @see     Character#isJavaLetterOrDigit(char)
 5564        * @see     Character#isLetterOrDigit(char)
 5565        * @see     Character#isLowerCase(char)
 5566        * @see     Character#isTitleCase(char)
 5567        * @see     Character#isUnicodeIdentifierStart(char)
 5568        * @see     Character#isUpperCase(char)
 5569        */
 5570       public static boolean isLetter(char ch) {
 5571           return isLetter((int)ch);
 5572       }
 5573   
 5574       /**
 5575        * Determines if the specified character (Unicode code point) is a letter.
 5576        * <p>
 5577        * A character is considered to be a letter if its general
 5578        * category type, provided by {@link Character#getType(int) getType(codePoint)},
 5579        * is any of the following:
 5580        * <ul>
 5581        * <li> {@code UPPERCASE_LETTER}
 5582        * <li> {@code LOWERCASE_LETTER}
 5583        * <li> {@code TITLECASE_LETTER}
 5584        * <li> {@code MODIFIER_LETTER}
 5585        * <li> {@code OTHER_LETTER}
 5586        * </ul>
 5587        *
 5588        * Not all letters have case. Many characters are
 5589        * letters but are neither uppercase nor lowercase nor titlecase.
 5590        *
 5591        * @param   codePoint the character (Unicode code point) to be tested.
 5592        * @return  {@code true} if the character is a letter;
 5593        *          {@code false} otherwise.
 5594        * @see     Character#isDigit(int)
 5595        * @see     Character#isJavaIdentifierStart(int)
 5596        * @see     Character#isLetterOrDigit(int)
 5597        * @see     Character#isLowerCase(int)
 5598        * @see     Character#isTitleCase(int)
 5599        * @see     Character#isUnicodeIdentifierStart(int)
 5600        * @see     Character#isUpperCase(int)
 5601        * @since   1.5
 5602        */
 5603       public static boolean isLetter(int codePoint) {
 5604           return ((((1 << Character.UPPERCASE_LETTER) |
 5605               (1 << Character.LOWERCASE_LETTER) |
 5606               (1 << Character.TITLECASE_LETTER) |
 5607               (1 << Character.MODIFIER_LETTER) |
 5608               (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
 5609               != 0;
 5610       }
 5611   
 5612       /**
 5613        * Determines if the specified character is a letter or digit.
 5614        * <p>
 5615        * A character is considered to be a letter or digit if either
 5616        * {@code Character.isLetter(char ch)} or
 5617        * {@code Character.isDigit(char ch)} returns
 5618        * {@code true} for the character.
 5619        *
 5620        * <p><b>Note:</b> This method cannot handle <a
 5621        * href="#supplementary"> supplementary characters</a>. To support
 5622        * all Unicode characters, including supplementary characters, use
 5623        * the {@link #isLetterOrDigit(int)} method.
 5624        *
 5625        * @param   ch   the character to be tested.
 5626        * @return  {@code true} if the character is a letter or digit;
 5627        *          {@code false} otherwise.
 5628        * @see     Character#isDigit(char)
 5629        * @see     Character#isJavaIdentifierPart(char)
 5630        * @see     Character#isJavaLetter(char)
 5631        * @see     Character#isJavaLetterOrDigit(char)
 5632        * @see     Character#isLetter(char)
 5633        * @see     Character#isUnicodeIdentifierPart(char)
 5634        * @since   1.0.2
 5635        */
 5636       public static boolean isLetterOrDigit(char ch) {
 5637           return isLetterOrDigit((int)ch);
 5638       }
 5639   
 5640       /**
 5641        * Determines if the specified character (Unicode code point) is a letter or digit.
 5642        * <p>
 5643        * A character is considered to be a letter or digit if either
 5644        * {@link #isLetter(int) isLetter(codePoint)} or
 5645        * {@link #isDigit(int) isDigit(codePoint)} returns
 5646        * {@code true} for the character.
 5647        *
 5648        * @param   codePoint the character (Unicode code point) to be tested.
 5649        * @return  {@code true} if the character is a letter or digit;
 5650        *          {@code false} otherwise.
 5651        * @see     Character#isDigit(int)
 5652        * @see     Character#isJavaIdentifierPart(int)
 5653        * @see     Character#isLetter(int)
 5654        * @see     Character#isUnicodeIdentifierPart(int)
 5655        * @since   1.5
 5656        */
 5657       public static boolean isLetterOrDigit(int codePoint) {
 5658           return ((((1 << Character.UPPERCASE_LETTER) |
 5659               (1 << Character.LOWERCASE_LETTER) |
 5660               (1 << Character.TITLECASE_LETTER) |
 5661               (1 << Character.MODIFIER_LETTER) |
 5662               (1 << Character.OTHER_LETTER) |
 5663               (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
 5664               != 0;
 5665       }
 5666   
 5667       /**
 5668        * Determines if the specified character is permissible as the first
 5669        * character in a Java identifier.
 5670        * <p>
 5671        * A character may start a Java identifier if and only if
 5672        * one of the following is true:
 5673        * <ul>
 5674        * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
 5675        * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
 5676        * <li> {@code ch} is a currency symbol (such as {@code '$'})
 5677        * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
 5678        * </ul>
 5679        *
 5680        * @param   ch the character to be tested.
 5681        * @return  {@code true} if the character may start a Java
 5682        *          identifier; {@code false} otherwise.
 5683        * @see     Character#isJavaLetterOrDigit(char)
 5684        * @see     Character#isJavaIdentifierStart(char)
 5685        * @see     Character#isJavaIdentifierPart(char)
 5686        * @see     Character#isLetter(char)
 5687        * @see     Character#isLetterOrDigit(char)
 5688        * @see     Character#isUnicodeIdentifierStart(char)
 5689        * @since   1.02
 5690        * @deprecated Replaced by isJavaIdentifierStart(char).
 5691        */
 5692       @Deprecated
 5693       public static boolean isJavaLetter(char ch) {
 5694           return isJavaIdentifierStart(ch);
 5695       }
 5696   
 5697       /**
 5698        * Determines if the specified character may be part of a Java
 5699        * identifier as other than the first character.
 5700        * <p>
 5701        * A character may be part of a Java identifier if and only if any
 5702        * of the following are true:
 5703        * <ul>
 5704        * <li>  it is a letter
 5705        * <li>  it is a currency symbol (such as {@code '$'})
 5706        * <li>  it is a connecting punctuation character (such as {@code '_'})
 5707        * <li>  it is a digit
 5708        * <li>  it is a numeric letter (such as a Roman numeral character)
 5709        * <li>  it is a combining mark
 5710        * <li>  it is a non-spacing mark
 5711        * <li> {@code isIdentifierIgnorable} returns
 5712        * {@code true} for the character.
 5713        * </ul>
 5714        *
 5715        * @param   ch the character to be tested.
 5716        * @return  {@code true} if the character may be part of a
 5717        *          Java identifier; {@code false} otherwise.
 5718        * @see     Character#isJavaLetter(char)
 5719        * @see     Character#isJavaIdentifierStart(char)
 5720        * @see     Character#isJavaIdentifierPart(char)
 5721        * @see     Character#isLetter(char)
 5722        * @see     Character#isLetterOrDigit(char)
 5723        * @see     Character#isUnicodeIdentifierPart(char)
 5724        * @see     Character#isIdentifierIgnorable(char)
 5725        * @since   1.02
 5726        * @deprecated Replaced by isJavaIdentifierPart(char).
 5727        */
 5728       @Deprecated
 5729       public static boolean isJavaLetterOrDigit(char ch) {
 5730           return isJavaIdentifierPart(ch);
 5731       }
 5732   
 5733       /**
 5734        * Determines if the specified character (Unicode code point) is an alphabet.
 5735        * <p>
 5736        * A character is considered to be alphabetic if its general category type,
 5737        * provided by {@link Character#getType(int) getType(codePoint)}, is any of
 5738        * the following:
 5739        * <ul>
 5740        * <li> <code>UPPERCASE_LETTER</code>
 5741        * <li> <code>LOWERCASE_LETTER</code>
 5742        * <li> <code>TITLECASE_LETTER</code>
 5743        * <li> <code>MODIFIER_LETTER</code>
 5744        * <li> <code>OTHER_LETTER</code>
 5745        * <li> <code>LETTER_NUMBER</code>
 5746        * </ul>
 5747        * or it has contributory property Other_Alphabetic as defined by the
 5748        * Unicode Standard.
 5749        *
 5750        * @param   codePoint the character (Unicode code point) to be tested.
 5751        * @return  <code>true</code> if the character is a Unicode alphabet
 5752        *          character, <code>false</code> otherwise.
 5753        * @since   1.7
 5754        */
 5755       public static boolean isAlphabetic(int codePoint) {
 5756           return (((((1 << Character.UPPERCASE_LETTER) |
 5757               (1 << Character.LOWERCASE_LETTER) |
 5758               (1 << Character.TITLECASE_LETTER) |
 5759               (1 << Character.MODIFIER_LETTER) |
 5760               (1 << Character.OTHER_LETTER) |
 5761               (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
 5762               CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
 5763       }
 5764   
 5765       /**
 5766        * Determines if the specified character (Unicode code point) is a CJKV
 5767        * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
 5768        * the Unicode Standard.
 5769        *
 5770        * @param   codePoint the character (Unicode code point) to be tested.
 5771        * @return  <code>true</code> if the character is a Unicode ideograph
 5772        *          character, <code>false</code> otherwise.
 5773        * @since   1.7
 5774        */
 5775       public static boolean isIdeographic(int codePoint) {
 5776           return CharacterData.of(codePoint).isIdeographic(codePoint);
 5777       }
 5778   
 5779       /**
 5780        * Determines if the specified character is
 5781        * permissible as the first character in a Java identifier.
 5782        * <p>
 5783        * A character may start a Java identifier if and only if
 5784        * one of the following conditions is true:
 5785        * <ul>
 5786        * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
 5787        * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
 5788        * <li> {@code ch} is a currency symbol (such as {@code '$'})
 5789        * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
 5790        * </ul>
 5791        *
 5792        * <p><b>Note:</b> This method cannot handle <a
 5793        * href="#supplementary"> supplementary characters</a>. To support
 5794        * all Unicode characters, including supplementary characters, use
 5795        * the {@link #isJavaIdentifierStart(int)} method.
 5796        *
 5797        * @param   ch the character to be tested.
 5798        * @return  {@code true} if the character may start a Java identifier;
 5799        *          {@code false} otherwise.
 5800        * @see     Character#isJavaIdentifierPart(char)
 5801        * @see     Character#isLetter(char)
 5802        * @see     Character#isUnicodeIdentifierStart(char)
 5803        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 5804        * @since   1.1
 5805        */
 5806       public static boolean isJavaIdentifierStart(char ch) {
 5807           return isJavaIdentifierStart((int)ch);
 5808       }
 5809   
 5810       /**
 5811        * Determines if the character (Unicode code point) is
 5812        * permissible as the first character in a Java identifier.
 5813        * <p>
 5814        * A character may start a Java identifier if and only if
 5815        * one of the following conditions is true:
 5816        * <ul>
 5817        * <li> {@link #isLetter(int) isLetter(codePoint)}
 5818        *      returns {@code true}
 5819        * <li> {@link #getType(int) getType(codePoint)}
 5820        *      returns {@code LETTER_NUMBER}
 5821        * <li> the referenced character is a currency symbol (such as {@code '$'})
 5822        * <li> the referenced character is a connecting punctuation character
 5823        *      (such as {@code '_'}).
 5824        * </ul>
 5825        *
 5826        * @param   codePoint the character (Unicode code point) to be tested.
 5827        * @return  {@code true} if the character may start a Java identifier;
 5828        *          {@code false} otherwise.
 5829        * @see     Character#isJavaIdentifierPart(int)
 5830        * @see     Character#isLetter(int)
 5831        * @see     Character#isUnicodeIdentifierStart(int)
 5832        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 5833        * @since   1.5
 5834        */
 5835       public static boolean isJavaIdentifierStart(int codePoint) {
 5836           return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
 5837       }
 5838   
 5839       /**
 5840        * Determines if the specified character may be part of a Java
 5841        * identifier as other than the first character.
 5842        * <p>
 5843        * A character may be part of a Java identifier if any of the following
 5844        * are true:
 5845        * <ul>
 5846        * <li>  it is a letter
 5847        * <li>  it is a currency symbol (such as {@code '$'})
 5848        * <li>  it is a connecting punctuation character (such as {@code '_'})
 5849        * <li>  it is a digit
 5850        * <li>  it is a numeric letter (such as a Roman numeral character)
 5851        * <li>  it is a combining mark
 5852        * <li>  it is a non-spacing mark
 5853        * <li> {@code isIdentifierIgnorable} returns
 5854        * {@code true} for the character
 5855        * </ul>
 5856        *
 5857        * <p><b>Note:</b> This method cannot handle <a
 5858        * href="#supplementary"> supplementary characters</a>. To support
 5859        * all Unicode characters, including supplementary characters, use
 5860        * the {@link #isJavaIdentifierPart(int)} method.
 5861        *
 5862        * @param   ch      the character to be tested.
 5863        * @return {@code true} if the character may be part of a
 5864        *          Java identifier; {@code false} otherwise.
 5865        * @see     Character#isIdentifierIgnorable(char)
 5866        * @see     Character#isJavaIdentifierStart(char)
 5867        * @see     Character#isLetterOrDigit(char)
 5868        * @see     Character#isUnicodeIdentifierPart(char)
 5869        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 5870        * @since   1.1
 5871        */
 5872       public static boolean isJavaIdentifierPart(char ch) {
 5873           return isJavaIdentifierPart((int)ch);
 5874       }
 5875   
 5876       /**
 5877        * Determines if the character (Unicode code point) may be part of a Java
 5878        * identifier as other than the first character.
 5879        * <p>
 5880        * A character may be part of a Java identifier if any of the following
 5881        * are true:
 5882        * <ul>
 5883        * <li>  it is a letter
 5884        * <li>  it is a currency symbol (such as {@code '$'})
 5885        * <li>  it is a connecting punctuation character (such as {@code '_'})
 5886        * <li>  it is a digit
 5887        * <li>  it is a numeric letter (such as a Roman numeral character)
 5888        * <li>  it is a combining mark
 5889        * <li>  it is a non-spacing mark
 5890        * <li> {@link #isIdentifierIgnorable(int)
 5891        * isIdentifierIgnorable(codePoint)} returns {@code true} for
 5892        * the character
 5893        * </ul>
 5894        *
 5895        * @param   codePoint the character (Unicode code point) to be tested.
 5896        * @return {@code true} if the character may be part of a
 5897        *          Java identifier; {@code false} otherwise.
 5898        * @see     Character#isIdentifierIgnorable(int)
 5899        * @see     Character#isJavaIdentifierStart(int)
 5900        * @see     Character#isLetterOrDigit(int)
 5901        * @see     Character#isUnicodeIdentifierPart(int)
 5902        * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
 5903        * @since   1.5
 5904        */
 5905       public static boolean isJavaIdentifierPart(int codePoint) {
 5906           return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
 5907       }
 5908   
 5909       /**
 5910        * Determines if the specified character is permissible as the
 5911        * first character in a Unicode identifier.
 5912        * <p>
 5913        * A character may start a Unicode identifier if and only if
 5914        * one of the following conditions is true:
 5915        * <ul>
 5916        * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
 5917        * <li> {@link #getType(char) getType(ch)} returns
 5918        *      {@code LETTER_NUMBER}.
 5919        * </ul>
 5920        *
 5921        * <p><b>Note:</b> This method cannot handle <a
 5922        * href="#supplementary"> supplementary characters</a>. To support
 5923        * all Unicode characters, including supplementary characters, use
 5924        * the {@link #isUnicodeIdentifierStart(int)} method.
 5925        *
 5926        * @param   ch      the character to be tested.
 5927        * @return  {@code true} if the character may start a Unicode
 5928        *          identifier; {@code false} otherwise.
 5929        * @see     Character#isJavaIdentifierStart(char)
 5930        * @see     Character#isLetter(char)
 5931        * @see     Character#isUnicodeIdentifierPart(char)
 5932        * @since   1.1
 5933        */
 5934       public static boolean isUnicodeIdentifierStart(char ch) {
 5935           return isUnicodeIdentifierStart((int)ch);
 5936       }
 5937   
 5938       /**
 5939        * Determines if the specified character (Unicode code point) is permissible as the
 5940        * first character in a Unicode identifier.
 5941        * <p>
 5942        * A character may start a Unicode identifier if and only if
 5943        * one of the following conditions is true:
 5944        * <ul>
 5945        * <li> {@link #isLetter(int) isLetter(codePoint)}
 5946        *      returns {@code true}
 5947        * <li> {@link #getType(int) getType(codePoint)}
 5948        *      returns {@code LETTER_NUMBER}.
 5949        * </ul>
 5950        * @param   codePoint the character (Unicode code point) to be tested.
 5951        * @return  {@code true} if the character may start a Unicode
 5952        *          identifier; {@code false} otherwise.
 5953        * @see     Character#isJavaIdentifierStart(int)
 5954        * @see     Character#isLetter(int)
 5955        * @see     Character#isUnicodeIdentifierPart(int)
 5956        * @since   1.5
 5957        */
 5958       public static boolean isUnicodeIdentifierStart(int codePoint) {
 5959           return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
 5960       }
 5961   
 5962       /**
 5963        * Determines if the specified character may be part of a Unicode
 5964        * identifier as other than the first character.
 5965        * <p>
 5966        * A character may be part of a Unicode identifier if and only if
 5967        * one of the following statements is true:
 5968        * <ul>
 5969        * <li>  it is a letter
 5970        * <li>  it is a connecting punctuation character (such as {@code '_'})
 5971        * <li>  it is a digit
 5972        * <li>  it is a numeric letter (such as a Roman numeral character)
 5973        * <li>  it is a combining mark
 5974        * <li>  it is a non-spacing mark
 5975        * <li> {@code isIdentifierIgnorable} returns
 5976        * {@code true} for this character.
 5977        * </ul>
 5978        *
 5979        * <p><b>Note:</b> This method cannot handle <a
 5980        * href="#supplementary"> supplementary characters</a>. To support
 5981        * all Unicode characters, including supplementary characters, use
 5982        * the {@link #isUnicodeIdentifierPart(int)} method.
 5983        *
 5984        * @param   ch      the character to be tested.
 5985        * @return  {@code true} if the character may be part of a
 5986        *          Unicode identifier; {@code false} otherwise.
 5987        * @see     Character#isIdentifierIgnorable(char)
 5988        * @see     Character#isJavaIdentifierPart(char)
 5989        * @see     Character#isLetterOrDigit(char)
 5990        * @see     Character#isUnicodeIdentifierStart(char)
 5991        * @since   1.1
 5992        */
 5993       public static boolean isUnicodeIdentifierPart(char ch) {
 5994           return isUnicodeIdentifierPart((int)ch);
 5995       }
 5996   
 5997       /**
 5998        * Determines if the specified character (Unicode code point) may be part of a Unicode
 5999        * identifier as other than the first character.
 6000        * <p>
 6001        * A character may be part of a Unicode identifier if and only if
 6002        * one of the following statements is true:
 6003        * <ul>
 6004        * <li>  it is a letter
 6005        * <li>  it is a connecting punctuation character (such as {@code '_'})
 6006        * <li>  it is a digit
 6007        * <li>  it is a numeric letter (such as a Roman numeral character)
 6008        * <li>  it is a combining mark
 6009        * <li>  it is a non-spacing mark
 6010        * <li> {@code isIdentifierIgnorable} returns
 6011        * {@code true} for this character.
 6012        * </ul>
 6013        * @param   codePoint the character (Unicode code point) to be tested.
 6014        * @return  {@code true} if the character may be part of a
 6015        *          Unicode identifier; {@code false} otherwise.
 6016        * @see     Character#isIdentifierIgnorable(int)
 6017        * @see     Character#isJavaIdentifierPart(int)
 6018        * @see     Character#isLetterOrDigit(int)
 6019        * @see     Character#isUnicodeIdentifierStart(int)
 6020        * @since   1.5
 6021        */
 6022       public static boolean isUnicodeIdentifierPart(int codePoint) {
 6023           return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
 6024       }
 6025   
 6026       /**
 6027        * Determines if the specified character should be regarded as
 6028        * an ignorable character in a Java identifier or a Unicode identifier.
 6029        * <p>
 6030        * The following Unicode characters are ignorable in a Java identifier
 6031        * or a Unicode identifier:
 6032        * <ul>
 6033        * <li>ISO control characters that are not whitespace
 6034        * <ul>
 6035        * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
 6036        * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
 6037        * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
 6038        * </ul>
 6039        *
 6040        * <li>all characters that have the {@code FORMAT} general
 6041        * category value
 6042        * </ul>
 6043        *
 6044        * <p><b>Note:</b> This method cannot handle <a
 6045        * href="#supplementary"> supplementary characters</a>. To support
 6046        * all Unicode characters, including supplementary characters, use
 6047        * the {@link #isIdentifierIgnorable(int)} method.
 6048        *
 6049        * @param   ch      the character to be tested.
 6050        * @return  {@code true} if the character is an ignorable control
 6051        *          character that may be part of a Java or Unicode identifier;
 6052        *           {@code false} otherwise.
 6053        * @see     Character#isJavaIdentifierPart(char)
 6054        * @see     Character#isUnicodeIdentifierPart(char)
 6055        * @since   1.1
 6056        */
 6057       public static boolean isIdentifierIgnorable(char ch) {
 6058           return isIdentifierIgnorable((int)ch);
 6059       }
 6060   
 6061       /**
 6062        * Determines if the specified character (Unicode code point) should be regarded as
 6063        * an ignorable character in a Java identifier or a Unicode identifier.
 6064        * <p>
 6065        * The following Unicode characters are ignorable in a Java identifier
 6066        * or a Unicode identifier:
 6067        * <ul>
 6068        * <li>ISO control characters that are not whitespace
 6069        * <ul>
 6070        * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
 6071        * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
 6072        * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
 6073        * </ul>
 6074        *
 6075        * <li>all characters that have the {@code FORMAT} general
 6076        * category value
 6077        * </ul>
 6078        *
 6079        * @param   codePoint the character (Unicode code point) to be tested.
 6080        * @return  {@code true} if the character is an ignorable control
 6081        *          character that may be part of a Java or Unicode identifier;
 6082        *          {@code false} otherwise.
 6083        * @see     Character#isJavaIdentifierPart(int)
 6084        * @see     Character#isUnicodeIdentifierPart(int)
 6085        * @since   1.5
 6086        */
 6087       public static boolean isIdentifierIgnorable(int codePoint) {
 6088           return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
 6089       }
 6090   
 6091       /**
 6092        * Converts the character argument to lowercase using case
 6093        * mapping information from the UnicodeData file.
 6094        * <p>
 6095        * Note that
 6096        * {@code Character.isLowerCase(Character.toLowerCase(ch))}
 6097        * does not always return {@code true} for some ranges of
 6098        * characters, particularly those that are symbols or ideographs.
 6099        *
 6100        * <p>In general, {@link String#toLowerCase()} should be used to map
 6101        * characters to lowercase. {@code String} case mapping methods
 6102        * have several benefits over {@code Character} case mapping methods.
 6103        * {@code String} case mapping methods can perform locale-sensitive
 6104        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 6105        * the {@code Character} case mapping methods cannot.
 6106        *
 6107        * <p><b>Note:</b> This method cannot handle <a
 6108        * href="#supplementary"> supplementary characters</a>. To support
 6109        * all Unicode characters, including supplementary characters, use
 6110        * the {@link #toLowerCase(int)} method.
 6111        *
 6112        * @param   ch   the character to be converted.
 6113        * @return  the lowercase equivalent of the character, if any;
 6114        *          otherwise, the character itself.
 6115        * @see     Character#isLowerCase(char)
 6116        * @see     String#toLowerCase()
 6117        */
 6118       public static char toLowerCase(char ch) {
 6119           return (char)toLowerCase((int)ch);
 6120       }
 6121   
 6122       /**
 6123        * Converts the character (Unicode code point) argument to
 6124        * lowercase using case mapping information from the UnicodeData
 6125        * file.
 6126        *
 6127        * <p> Note that
 6128        * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
 6129        * does not always return {@code true} for some ranges of
 6130        * characters, particularly those that are symbols or ideographs.
 6131        *
 6132        * <p>In general, {@link String#toLowerCase()} should be used to map
 6133        * characters to lowercase. {@code String} case mapping methods
 6134        * have several benefits over {@code Character} case mapping methods.
 6135        * {@code String} case mapping methods can perform locale-sensitive
 6136        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 6137        * the {@code Character} case mapping methods cannot.
 6138        *
 6139        * @param   codePoint   the character (Unicode code point) to be converted.
 6140        * @return  the lowercase equivalent of the character (Unicode code
 6141        *          point), if any; otherwise, the character itself.
 6142        * @see     Character#isLowerCase(int)
 6143        * @see     String#toLowerCase()
 6144        *
 6145        * @since   1.5
 6146        */
 6147       public static int toLowerCase(int codePoint) {
 6148           return CharacterData.of(codePoint).toLowerCase(codePoint);
 6149       }
 6150   
 6151       /**
 6152        * Converts the character argument to uppercase using case mapping
 6153        * information from the UnicodeData file.
 6154        * <p>
 6155        * Note that
 6156        * {@code Character.isUpperCase(Character.toUpperCase(ch))}
 6157        * does not always return {@code true} for some ranges of
 6158        * characters, particularly those that are symbols or ideographs.
 6159        *
 6160        * <p>In general, {@link String#toUpperCase()} should be used to map
 6161        * characters to uppercase. {@code String} case mapping methods
 6162        * have several benefits over {@code Character} case mapping methods.
 6163        * {@code String} case mapping methods can perform locale-sensitive
 6164        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 6165        * the {@code Character} case mapping methods cannot.
 6166        *
 6167        * <p><b>Note:</b> This method cannot handle <a
 6168        * href="#supplementary"> supplementary characters</a>. To support
 6169        * all Unicode characters, including supplementary characters, use
 6170        * the {@link #toUpperCase(int)} method.
 6171        *
 6172        * @param   ch   the character to be converted.
 6173        * @return  the uppercase equivalent of the character, if any;
 6174        *          otherwise, the character itself.
 6175        * @see     Character#isUpperCase(char)
 6176        * @see     String#toUpperCase()
 6177        */
 6178       public static char toUpperCase(char ch) {
 6179           return (char)toUpperCase((int)ch);
 6180       }
 6181   
 6182       /**
 6183        * Converts the character (Unicode code point) argument to
 6184        * uppercase using case mapping information from the UnicodeData
 6185        * file.
 6186        *
 6187        * <p>Note that
 6188        * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
 6189        * does not always return {@code true} for some ranges of
 6190        * characters, particularly those that are symbols or ideographs.
 6191        *
 6192        * <p>In general, {@link String#toUpperCase()} should be used to map
 6193        * characters to uppercase. {@code String} case mapping methods
 6194        * have several benefits over {@code Character} case mapping methods.
 6195        * {@code String} case mapping methods can perform locale-sensitive
 6196        * mappings, context-sensitive mappings, and 1:M character mappings, whereas
 6197        * the {@code Character} case mapping methods cannot.
 6198        *
 6199        * @param   codePoint   the character (Unicode code point) to be converted.
 6200        * @return  the uppercase equivalent of the character, if any;
 6201        *          otherwise, the character itself.
 6202        * @see     Character#isUpperCase(int)
 6203        * @see     String#toUpperCase()
 6204        *
 6205        * @since   1.5
 6206        */
 6207       public static int toUpperCase(int codePoint) {
 6208           return CharacterData.of(codePoint).toUpperCase(codePoint);
 6209       }
 6210   
 6211       /**
 6212        * Converts the character argument to titlecase using case mapping
 6213        * information from the UnicodeData file. If a character has no
 6214        * explicit titlecase mapping and is not itself a titlecase char
 6215        * according to UnicodeData, then the uppercase mapping is
 6216        * returned as an equivalent titlecase mapping. If the
 6217        * {@code char} argument is already a titlecase
 6218        * {@code char}, the same {@code char} value will be
 6219        * returned.
 6220        * <p>
 6221        * Note that
 6222        * {@code Character.isTitleCase(Character.toTitleCase(ch))}
 6223        * does not always return {@code true} for some ranges of
 6224        * characters.
 6225        *
 6226        * <p><b>Note:</b> This method cannot handle <a
 6227        * href="#supplementary"> supplementary characters</a>. To support
 6228        * all Unicode characters, including supplementary characters, use
 6229        * the {@link #toTitleCase(int)} method.
 6230        *
 6231        * @param   ch   the character to be converted.
 6232        * @return  the titlecase equivalent of the character, if any;
 6233        *          otherwise, the character itself.
 6234        * @see     Character#isTitleCase(char)
 6235        * @see     Character#toLowerCase(char)
 6236        * @see     Character#toUpperCase(char)
 6237        * @since   1.0.2
 6238        */
 6239       public static char toTitleCase(char ch) {
 6240           return (char)toTitleCase((int)ch);
 6241       }
 6242   
 6243       /**
 6244        * Converts the character (Unicode code point) argument to titlecase using case mapping
 6245        * information from the UnicodeData file. If a character has no
 6246        * explicit titlecase mapping and is not itself a titlecase char
 6247        * according to UnicodeData, then the uppercase mapping is
 6248        * returned as an equivalent titlecase mapping. If the
 6249        * character argument is already a titlecase
 6250        * character, the same character value will be
 6251        * returned.
 6252        *
 6253        * <p>Note that
 6254        * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
 6255        * does not always return {@code true} for some ranges of
 6256        * characters.
 6257        *
 6258        * @param   codePoint   the character (Unicode code point) to be converted.
 6259        * @return  the titlecase equivalent of the character, if any;
 6260        *          otherwise, the character itself.
 6261        * @see     Character#isTitleCase(int)
 6262        * @see     Character#toLowerCase(int)
 6263        * @see     Character#toUpperCase(int)
 6264        * @since   1.5
 6265        */
 6266       public static int toTitleCase(int codePoint) {
 6267           return CharacterData.of(codePoint).toTitleCase(codePoint);
 6268       }
 6269   
 6270       /**
 6271        * Returns the numeric value of the character {@code ch} in the
 6272        * specified radix.
 6273        * <p>
 6274        * If the radix is not in the range {@code MIN_RADIX} &le;
 6275        * {@code radix} &le; {@code MAX_RADIX} or if the
 6276        * value of {@code ch} is not a valid digit in the specified
 6277        * radix, {@code -1} is returned. A character is a valid digit
 6278        * if at least one of the following is true:
 6279        * <ul>
 6280        * <li>The method {@code isDigit} is {@code true} of the character
 6281        *     and the Unicode decimal digit value of the character (or its
 6282        *     single-character decomposition) is less than the specified radix.
 6283        *     In this case the decimal digit value is returned.
 6284        * <li>The character is one of the uppercase Latin letters
 6285        *     {@code 'A'} through {@code 'Z'} and its code is less than
 6286        *     {@code radix + 'A' - 10}.
 6287        *     In this case, {@code ch - 'A' + 10}
 6288        *     is returned.
 6289        * <li>The character is one of the lowercase Latin letters
 6290        *     {@code 'a'} through {@code 'z'} and its code is less than
 6291        *     {@code radix + 'a' - 10}.
 6292        *     In this case, {@code ch - 'a' + 10}
 6293        *     is returned.
 6294        * <li>The character is one of the fullwidth uppercase Latin letters A
 6295        *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
 6296        *     and its code is less than
 6297        *     {@code radix + '\u005CuFF21' - 10}.
 6298        *     In this case, {@code ch - '\u005CuFF21' + 10}
 6299        *     is returned.
 6300        * <li>The character is one of the fullwidth lowercase Latin letters a
 6301        *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
 6302        *     and its code is less than
 6303        *     {@code radix + '\u005CuFF41' - 10}.
 6304        *     In this case, {@code ch - '\u005CuFF41' + 10}
 6305        *     is returned.
 6306        * </ul>
 6307        *
 6308        * <p><b>Note:</b> This method cannot handle <a
 6309        * href="#supplementary"> supplementary characters</a>. To support
 6310        * all Unicode characters, including supplementary characters, use
 6311        * the {@link #digit(int, int)} method.
 6312        *
 6313        * @param   ch      the character to be converted.
 6314        * @param   radix   the radix.
 6315        * @return  the numeric value represented by the character in the
 6316        *          specified radix.
 6317        * @see     Character#forDigit(int, int)
 6318        * @see     Character#isDigit(char)
 6319        */
 6320       public static int digit(char ch, int radix) {
 6321           return digit((int)ch, radix);
 6322       }
 6323   
 6324       /**
 6325        * Returns the numeric value of the specified character (Unicode
 6326        * code point) in the specified radix.
 6327        *
 6328        * <p>If the radix is not in the range {@code MIN_RADIX} &le;
 6329        * {@code radix} &le; {@code MAX_RADIX} or if the
 6330        * character is not a valid digit in the specified
 6331        * radix, {@code -1} is returned. A character is a valid digit
 6332        * if at least one of the following is true:
 6333        * <ul>
 6334        * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
 6335        *     and the Unicode decimal digit value of the character (or its
 6336        *     single-character decomposition) is less than the specified radix.
 6337        *     In this case the decimal digit value is returned.
 6338        * <li>The character is one of the uppercase Latin letters
 6339        *     {@code 'A'} through {@code 'Z'} and its code is less than
 6340        *     {@code radix + 'A' - 10}.
 6341        *     In this case, {@code codePoint - 'A' + 10}
 6342        *     is returned.
 6343        * <li>The character is one of the lowercase Latin letters
 6344        *     {@code 'a'} through {@code 'z'} and its code is less than
 6345        *     {@code radix + 'a' - 10}.
 6346        *     In this case, {@code codePoint - 'a' + 10}
 6347        *     is returned.
 6348        * <li>The character is one of the fullwidth uppercase Latin letters A
 6349        *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
 6350        *     and its code is less than
 6351        *     {@code radix + '\u005CuFF21' - 10}.
 6352        *     In this case,
 6353        *     {@code codePoint - '\u005CuFF21' + 10}
 6354        *     is returned.
 6355        * <li>The character is one of the fullwidth lowercase Latin letters a
 6356        *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
 6357        *     and its code is less than
 6358        *     {@code radix + '\u005CuFF41'- 10}.
 6359        *     In this case,
 6360        *     {@code codePoint - '\u005CuFF41' + 10}
 6361        *     is returned.
 6362        * </ul>
 6363        *
 6364        * @param   codePoint the character (Unicode code point) to be converted.
 6365        * @param   radix   the radix.
 6366        * @return  the numeric value represented by the character in the
 6367        *          specified radix.
 6368        * @see     Character#forDigit(int, int)
 6369        * @see     Character#isDigit(int)
 6370        * @since   1.5
 6371        */
 6372       public static int digit(int codePoint, int radix) {
 6373           return CharacterData.of(codePoint).digit(codePoint, radix);
 6374       }
 6375   
 6376       /**
 6377        * Returns the {@code int} value that the specified Unicode
 6378        * character represents. For example, the character
 6379        * {@code '\u005Cu216C'} (the roman numeral fifty) will return
 6380        * an int with a value of 50.
 6381        * <p>
 6382        * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
 6383        * {@code '\u005Cu005A'}), lowercase
 6384        * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
 6385        * full width variant ({@code '\u005CuFF21'} through
 6386        * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
 6387        * {@code '\u005CuFF5A'}) forms have numeric values from 10
 6388        * through 35. This is independent of the Unicode specification,
 6389        * which does not assign numeric values to these {@code char}
 6390        * values.
 6391        * <p>
 6392        * If the character does not have a numeric value, then -1 is returned.
 6393        * If the character has a numeric value that cannot be represented as a
 6394        * nonnegative integer (for example, a fractional value), then -2
 6395        * is returned.
 6396        *
 6397        * <p><b>Note:</b> This method cannot handle <a
 6398        * href="#supplementary"> supplementary characters</a>. To support
 6399        * all Unicode characters, including supplementary characters, use
 6400        * the {@link #getNumericValue(int)} method.
 6401        *
 6402        * @param   ch      the character to be converted.
 6403        * @return  the numeric value of the character, as a nonnegative {@code int}
 6404        *           value; -2 if the character has a numeric value that is not a
 6405        *          nonnegative integer; -1 if the character has no numeric value.
 6406        * @see     Character#forDigit(int, int)
 6407        * @see     Character#isDigit(char)
 6408        * @since   1.1
 6409        */
 6410       public static int getNumericValue(char ch) {
 6411           return getNumericValue((int)ch);
 6412       }
 6413   
 6414       /**
 6415        * Returns the {@code int} value that the specified
 6416        * character (Unicode code point) represents. For example, the character
 6417        * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
 6418        * an {@code int} with a value of 50.
 6419        * <p>
 6420        * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
 6421        * {@code '\u005Cu005A'}), lowercase
 6422        * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
 6423        * full width variant ({@code '\u005CuFF21'} through
 6424        * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
 6425        * {@code '\u005CuFF5A'}) forms have numeric values from 10
 6426        * through 35. This is independent of the Unicode specification,
 6427        * which does not assign numeric values to these {@code char}
 6428        * values.
 6429        * <p>
 6430        * If the character does not have a numeric value, then -1 is returned.
 6431        * If the character has a numeric value that cannot be represented as a
 6432        * nonnegative integer (for example, a fractional value), then -2
 6433        * is returned.
 6434        *
 6435        * @param   codePoint the character (Unicode code point) to be converted.
 6436        * @return  the numeric value of the character, as a nonnegative {@code int}
 6437        *          value; -2 if the character has a numeric value that is not a
 6438        *          nonnegative integer; -1 if the character has no numeric value.
 6439        * @see     Character#forDigit(int, int)
 6440        * @see     Character#isDigit(int)
 6441        * @since   1.5
 6442        */
 6443       public static int getNumericValue(int codePoint) {
 6444           return CharacterData.of(codePoint).getNumericValue(codePoint);
 6445       }
 6446   
 6447       /**
 6448        * Determines if the specified character is ISO-LATIN-1 white space.
 6449        * This method returns {@code true} for the following five
 6450        * characters only:
 6451        * <table>
 6452        * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
 6453        *     <td>{@code HORIZONTAL TABULATION}</td></tr>
 6454        * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
 6455        *     <td>{@code NEW LINE}</td></tr>
 6456        * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
 6457        *     <td>{@code FORM FEED}</td></tr>
 6458        * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
 6459        *     <td>{@code CARRIAGE RETURN}</td></tr>
 6460        * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
 6461        *     <td>{@code SPACE}</td></tr>
 6462        * </table>
 6463        *
 6464        * @param      ch   the character to be tested.
 6465        * @return     {@code true} if the character is ISO-LATIN-1 white
 6466        *             space; {@code false} otherwise.
 6467        * @see        Character#isSpaceChar(char)
 6468        * @see        Character#isWhitespace(char)
 6469        * @deprecated Replaced by isWhitespace(char).
 6470        */
 6471       @Deprecated
 6472       public static boolean isSpace(char ch) {
 6473           return (ch <= 0x0020) &&
 6474               (((((1L << 0x0009) |
 6475               (1L << 0x000A) |
 6476               (1L << 0x000C) |
 6477               (1L << 0x000D) |
 6478               (1L << 0x0020)) >> ch) & 1L) != 0);
 6479       }
 6480   
 6481   
 6482       /**
 6483        * Determines if the specified character is a Unicode space character.
 6484        * A character is considered to be a space character if and only if
 6485        * it is specified to be a space character by the Unicode Standard. This
 6486        * method returns true if the character's general category type is any of
 6487        * the following:
 6488        * <ul>
 6489        * <li> {@code SPACE_SEPARATOR}
 6490        * <li> {@code LINE_SEPARATOR}
 6491        * <li> {@code PARAGRAPH_SEPARATOR}
 6492        * </ul>
 6493        *
 6494        * <p><b>Note:</b> This method cannot handle <a
 6495        * href="#supplementary"> supplementary characters</a>. To support
 6496        * all Unicode characters, including supplementary characters, use
 6497        * the {@link #isSpaceChar(int)} method.
 6498        *
 6499        * @param   ch      the character to be tested.
 6500        * @return  {@code true} if the character is a space character;
 6501        *          {@code false} otherwise.
 6502        * @see     Character#isWhitespace(char)
 6503        * @since   1.1
 6504        */
 6505       public static boolean isSpaceChar(char ch) {
 6506           return isSpaceChar((int)ch);
 6507       }
 6508   
 6509       /**
 6510        * Determines if the specified character (Unicode code point) is a
 6511        * Unicode space character.  A character is considered to be a
 6512        * space character if and only if it is specified to be a space
 6513        * character by the Unicode Standard. This method returns true if
 6514        * the character's general category type is any of the following:
 6515        *
 6516        * <ul>
 6517        * <li> {@link #SPACE_SEPARATOR}
 6518        * <li> {@link #LINE_SEPARATOR}
 6519        * <li> {@link #PARAGRAPH_SEPARATOR}
 6520        * </ul>
 6521        *
 6522        * @param   codePoint the character (Unicode code point) to be tested.
 6523        * @return  {@code true} if the character is a space character;
 6524        *          {@code false} otherwise.
 6525        * @see     Character#isWhitespace(int)
 6526        * @since   1.5
 6527        */
 6528       public static boolean isSpaceChar(int codePoint) {
 6529           return ((((1 << Character.SPACE_SEPARATOR) |
 6530                     (1 << Character.LINE_SEPARATOR) |
 6531                     (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
 6532               != 0;
 6533       }
 6534   
 6535       /**
 6536        * Determines if the specified character is white space according to Java.
 6537        * A character is a Java whitespace character if and only if it satisfies
 6538        * one of the following criteria:
 6539        * <ul>
 6540        * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
 6541        *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
 6542        *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
 6543        *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
 6544        * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
 6545        * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
 6546        * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
 6547        * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
 6548        * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
 6549        * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
 6550        * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
 6551        * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
 6552        * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
 6553        * </ul>
 6554        *
 6555        * <p><b>Note:</b> This method cannot handle <a
 6556        * href="#supplementary"> supplementary characters</a>. To support
 6557        * all Unicode characters, including supplementary characters, use
 6558        * the {@link #isWhitespace(int)} method.
 6559        *
 6560        * @param   ch the character to be tested.
 6561        * @return  {@code true} if the character is a Java whitespace
 6562        *          character; {@code false} otherwise.
 6563        * @see     Character#isSpaceChar(char)
 6564        * @since   1.1
 6565        */
 6566       public static boolean isWhitespace(char ch) {
 6567           return isWhitespace((int)ch);
 6568       }
 6569   
 6570       /**
 6571        * Determines if the specified character (Unicode code point) is
 6572        * white space according to Java.  A character is a Java
 6573        * whitespace character if and only if it satisfies one of the
 6574        * following criteria:
 6575        * <ul>
 6576        * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
 6577        *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
 6578        *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
 6579        *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
 6580        * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
 6581        * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
 6582        * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
 6583        * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
 6584        * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
 6585        * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
 6586        * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
 6587        * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
 6588        * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
 6589        * </ul>
 6590        * <p>
 6591        *
 6592        * @param   codePoint the character (Unicode code point) to be tested.
 6593        * @return  {@code true} if the character is a Java whitespace
 6594        *          character; {@code false} otherwise.
 6595        * @see     Character#isSpaceChar(int)
 6596        * @since   1.5
 6597        */
 6598       public static boolean isWhitespace(int codePoint) {
 6599           return CharacterData.of(codePoint).isWhitespace(codePoint);
 6600       }
 6601   
 6602       /**
 6603        * Determines if the specified character is an ISO control
 6604        * character.  A character is considered to be an ISO control
 6605        * character if its code is in the range {@code '\u005Cu0000'}
 6606        * through {@code '\u005Cu001F'} or in the range
 6607        * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
 6608        *
 6609        * <p><b>Note:</b> This method cannot handle <a
 6610        * href="#supplementary"> supplementary characters</a>. To support
 6611        * all Unicode characters, including supplementary characters, use
 6612        * the {@link #isISOControl(int)} method.
 6613        *
 6614        * @param   ch      the character to be tested.
 6615        * @return  {@code true} if the character is an ISO control character;
 6616        *          {@code false} otherwise.
 6617        *
 6618        * @see     Character#isSpaceChar(char)
 6619        * @see     Character#isWhitespace(char)
 6620        * @since   1.1
 6621        */
 6622       public static boolean isISOControl(char ch) {
 6623           return isISOControl((int)ch);
 6624       }
 6625   
 6626       /**
 6627        * Determines if the referenced character (Unicode code point) is an ISO control
 6628        * character.  A character is considered to be an ISO control
 6629        * character if its code is in the range {@code '\u005Cu0000'}
 6630        * through {@code '\u005Cu001F'} or in the range
 6631        * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
 6632        *
 6633        * @param   codePoint the character (Unicode code point) to be tested.
 6634        * @return  {@code true} if the character is an ISO control character;
 6635        *          {@code false} otherwise.
 6636        * @see     Character#isSpaceChar(int)
 6637        * @see     Character#isWhitespace(int)
 6638        * @since   1.5
 6639        */
 6640       public static boolean isISOControl(int codePoint) {
 6641           // Optimized form of:
 6642           //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
 6643           //     (codePoint >= 0x7F && codePoint <= 0x9F);
 6644           return codePoint <= 0x9F &&
 6645               (codePoint >= 0x7F || (codePoint >>> 5 == 0));
 6646       }
 6647   
 6648       /**
 6649        * Returns a value indicating a character's general category.
 6650        *
 6651        * <p><b>Note:</b> This method cannot handle <a
 6652        * href="#supplementary"> supplementary characters</a>. To support
 6653        * all Unicode characters, including supplementary characters, use
 6654        * the {@link #getType(int)} method.
 6655        *
 6656        * @param   ch      the character to be tested.
 6657        * @return  a value of type {@code int} representing the
 6658        *          character's general category.
 6659        * @see     Character#COMBINING_SPACING_MARK
 6660        * @see     Character#CONNECTOR_PUNCTUATION
 6661        * @see     Character#CONTROL
 6662        * @see     Character#CURRENCY_SYMBOL
 6663        * @see     Character#DASH_PUNCTUATION
 6664        * @see     Character#DECIMAL_DIGIT_NUMBER
 6665        * @see     Character#ENCLOSING_MARK
 6666        * @see     Character#END_PUNCTUATION
 6667        * @see     Character#FINAL_QUOTE_PUNCTUATION
 6668        * @see     Character#FORMAT
 6669        * @see     Character#INITIAL_QUOTE_PUNCTUATION
 6670        * @see     Character#LETTER_NUMBER
 6671        * @see     Character#LINE_SEPARATOR
 6672        * @see     Character#LOWERCASE_LETTER
 6673        * @see     Character#MATH_SYMBOL
 6674        * @see     Character#MODIFIER_LETTER
 6675        * @see     Character#MODIFIER_SYMBOL
 6676        * @see     Character#NON_SPACING_MARK
 6677        * @see     Character#OTHER_LETTER
 6678        * @see     Character#OTHER_NUMBER
 6679        * @see     Character#OTHER_PUNCTUATION
 6680        * @see     Character#OTHER_SYMBOL
 6681        * @see     Character#PARAGRAPH_SEPARATOR
 6682        * @see     Character#PRIVATE_USE
 6683        * @see     Character#SPACE_SEPARATOR
 6684        * @see     Character#START_PUNCTUATION
 6685        * @see     Character#SURROGATE
 6686        * @see     Character#TITLECASE_LETTER
 6687        * @see     Character#UNASSIGNED
 6688        * @see     Character#UPPERCASE_LETTER
 6689        * @since   1.1
 6690        */
 6691       public static int getType(char ch) {
 6692           return getType((int)ch);
 6693       }
 6694   
 6695       /**
 6696        * Returns a value indicating a character's general category.
 6697        *
 6698        * @param   codePoint the character (Unicode code point) to be tested.
 6699        * @return  a value of type {@code int} representing the
 6700        *          character's general category.
 6701        * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
 6702        * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
 6703        * @see     Character#CONTROL CONTROL
 6704        * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
 6705        * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
 6706        * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
 6707        * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
 6708        * @see     Character#END_PUNCTUATION END_PUNCTUATION
 6709        * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
 6710        * @see     Character#FORMAT FORMAT
 6711        * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
 6712        * @see     Character#LETTER_NUMBER LETTER_NUMBER
 6713        * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
 6714        * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
 6715        * @see     Character#MATH_SYMBOL MATH_SYMBOL
 6716        * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
 6717        * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
 6718        * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
 6719        * @see     Character#OTHER_LETTER OTHER_LETTER
 6720        * @see     Character#OTHER_NUMBER OTHER_NUMBER
 6721        * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
 6722        * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
 6723        * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
 6724        * @see     Character#PRIVATE_USE PRIVATE_USE
 6725        * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
 6726        * @see     Character#START_PUNCTUATION START_PUNCTUATION
 6727        * @see     Character#SURROGATE SURROGATE
 6728        * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
 6729        * @see     Character#UNASSIGNED UNASSIGNED
 6730        * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
 6731        * @since   1.5
 6732        */
 6733       public static int getType(int codePoint) {
 6734           return CharacterData.of(codePoint).getType(codePoint);
 6735       }
 6736   
 6737       /**
 6738        * Determines the character representation for a specific digit in
 6739        * the specified radix. If the value of {@code radix} is not a
 6740        * valid radix, or the value of {@code digit} is not a valid
 6741        * digit in the specified radix, the null character
 6742        * ({@code '\u005Cu0000'}) is returned.
 6743        * <p>
 6744        * The {@code radix} argument is valid if it is greater than or
 6745        * equal to {@code MIN_RADIX} and less than or equal to
 6746        * {@code MAX_RADIX}. The {@code digit} argument is valid if
 6747        * {@code 0 <= digit < radix}.
 6748        * <p>
 6749        * If the digit is less than 10, then
 6750        * {@code '0' + digit} is returned. Otherwise, the value
 6751        * {@code 'a' + digit - 10} is returned.
 6752        *
 6753        * @param   digit   the number to convert to a character.
 6754        * @param   radix   the radix.
 6755        * @return  the {@code char} representation of the specified digit
 6756        *          in the specified radix.
 6757        * @see     Character#MIN_RADIX
 6758        * @see     Character#MAX_RADIX
 6759        * @see     Character#digit(char, int)
 6760        */
 6761       public static char forDigit(int digit, int radix) {
 6762           if ((digit >= radix) || (digit < 0)) {
 6763               return '\0';
 6764           }
 6765           if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
 6766               return '\0';
 6767           }
 6768           if (digit < 10) {
 6769               return (char)('0' + digit);
 6770           }
 6771           return (char)('a' - 10 + digit);
 6772       }
 6773   
 6774       /**
 6775        * Returns the Unicode directionality property for the given
 6776        * character.  Character directionality is used to calculate the
 6777        * visual ordering of text. The directionality value of undefined
 6778        * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
 6779        *
 6780        * <p><b>Note:</b> This method cannot handle <a
 6781        * href="#supplementary"> supplementary characters</a>. To support
 6782        * all Unicode characters, including supplementary characters, use
 6783        * the {@link #getDirectionality(int)} method.
 6784        *
 6785        * @param  ch {@code char} for which the directionality property
 6786        *            is requested.
 6787        * @return the directionality property of the {@code char} value.
 6788        *
 6789        * @see Character#DIRECTIONALITY_UNDEFINED
 6790        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
 6791        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
 6792        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
 6793        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
 6794        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 6795        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 6796        * @see Character#DIRECTIONALITY_ARABIC_NUMBER
 6797        * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 6798        * @see Character#DIRECTIONALITY_NONSPACING_MARK
 6799        * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
 6800        * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
 6801        * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
 6802        * @see Character#DIRECTIONALITY_WHITESPACE
 6803        * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
 6804        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
 6805        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
 6806        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
 6807        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
 6808        * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
 6809        * @since 1.4
 6810        */
 6811       public static byte getDirectionality(char ch) {
 6812           return getDirectionality((int)ch);
 6813       }
 6814   
 6815       /**
 6816        * Returns the Unicode directionality property for the given
 6817        * character (Unicode code point).  Character directionality is
 6818        * used to calculate the visual ordering of text. The
 6819        * directionality value of undefined character is {@link
 6820        * #DIRECTIONALITY_UNDEFINED}.
 6821        *
 6822        * @param   codePoint the character (Unicode code point) for which
 6823        *          the directionality property is requested.
 6824        * @return the directionality property of the character.
 6825        *
 6826        * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
 6827        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
 6828        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
 6829        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
 6830        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
 6831        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 6832        * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 6833        * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
 6834        * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 6835        * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
 6836        * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
 6837        * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
 6838        * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
 6839        * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
 6840        * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
 6841        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
 6842        * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
 6843        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
 6844        * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
 6845        * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
 6846        * @since    1.5
 6847        */
 6848       public static byte getDirectionality(int codePoint) {
 6849           return CharacterData.of(codePoint).getDirectionality(codePoint);
 6850       }
 6851   
 6852       /**
 6853        * Determines whether the character is mirrored according to the
 6854        * Unicode specification.  Mirrored characters should have their
 6855        * glyphs horizontally mirrored when displayed in text that is
 6856        * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
 6857        * PARENTHESIS is semantically defined to be an <i>opening
 6858        * parenthesis</i>.  This will appear as a "(" in text that is
 6859        * left-to-right but as a ")" in text that is right-to-left.
 6860        *
 6861        * <p><b>Note:</b> This method cannot handle <a
 6862        * href="#supplementary"> supplementary characters</a>. To support
 6863        * all Unicode characters, including supplementary characters, use
 6864        * the {@link #isMirrored(int)} method.
 6865        *
 6866        * @param  ch {@code char} for which the mirrored property is requested
 6867        * @return {@code true} if the char is mirrored, {@code false}
 6868        *         if the {@code char} is not mirrored or is not defined.
 6869        * @since 1.4
 6870        */
 6871       public static boolean isMirrored(char ch) {
 6872           return isMirrored((int)ch);
 6873       }
 6874   
 6875       /**
 6876        * Determines whether the specified character (Unicode code point)
 6877        * is mirrored according to the Unicode specification.  Mirrored
 6878        * characters should have their glyphs horizontally mirrored when
 6879        * displayed in text that is right-to-left.  For example,
 6880        * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
 6881        * defined to be an <i>opening parenthesis</i>.  This will appear
 6882        * as a "(" in text that is left-to-right but as a ")" in text
 6883        * that is right-to-left.
 6884        *
 6885        * @param   codePoint the character (Unicode code point) to be tested.
 6886        * @return  {@code true} if the character is mirrored, {@code false}
 6887        *          if the character is not mirrored or is not defined.
 6888        * @since   1.5
 6889        */
 6890       public static boolean isMirrored(int codePoint) {
 6891           return CharacterData.of(codePoint).isMirrored(codePoint);
 6892       }
 6893   
 6894       /**
 6895        * Compares two {@code Character} objects numerically.
 6896        *
 6897        * @param   anotherCharacter   the {@code Character} to be compared.
 6898   
 6899        * @return  the value {@code 0} if the argument {@code Character}
 6900        *          is equal to this {@code Character}; a value less than
 6901        *          {@code 0} if this {@code Character} is numerically less
 6902        *          than the {@code Character} argument; and a value greater than
 6903        *          {@code 0} if this {@code Character} is numerically greater
 6904        *          than the {@code Character} argument (unsigned comparison).
 6905        *          Note that this is strictly a numerical comparison; it is not
 6906        *          locale-dependent.
 6907        * @since   1.2
 6908        */
 6909       public int compareTo(Character anotherCharacter) {
 6910           return compare(this.value, anotherCharacter.value);
 6911       }
 6912   
 6913       /**
 6914        * Compares two {@code char} values numerically.
 6915        * The value returned is identical to what would be returned by:
 6916        * <pre>
 6917        *    Character.valueOf(x).compareTo(Character.valueOf(y))
 6918        * </pre>
 6919        *
 6920        * @param  x the first {@code char} to compare
 6921        * @param  y the second {@code char} to compare
 6922        * @return the value {@code 0} if {@code x == y};
 6923        *         a value less than {@code 0} if {@code x < y}; and
 6924        *         a value greater than {@code 0} if {@code x > y}
 6925        * @since 1.7
 6926        */
 6927       public static int compare(char x, char y) {
 6928           return x - y;
 6929       }
 6930   
 6931       /**
 6932        * Converts the character (Unicode code point) argument to uppercase using
 6933        * information from the UnicodeData file.
 6934        * <p>
 6935        *
 6936        * @param   codePoint   the character (Unicode code point) to be converted.
 6937        * @return  either the uppercase equivalent of the character, if
 6938        *          any, or an error flag ({@code Character.ERROR})
 6939        *          that indicates that a 1:M {@code char} mapping exists.
 6940        * @see     Character#isLowerCase(char)
 6941        * @see     Character#isUpperCase(char)
 6942        * @see     Character#toLowerCase(char)
 6943        * @see     Character#toTitleCase(char)
 6944        * @since 1.4
 6945        */
 6946       static int toUpperCaseEx(int codePoint) {
 6947           assert isValidCodePoint(codePoint);
 6948           return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
 6949       }
 6950   
 6951       /**
 6952        * Converts the character (Unicode code point) argument to uppercase using case
 6953        * mapping information from the SpecialCasing file in the Unicode
 6954        * specification. If a character has no explicit uppercase
 6955        * mapping, then the {@code char} itself is returned in the
 6956        * {@code char[]}.
 6957        *
 6958        * @param   codePoint   the character (Unicode code point) to be converted.
 6959        * @return a {@code char[]} with the uppercased character.
 6960        * @since 1.4
 6961        */
 6962       static char[] toUpperCaseCharArray(int codePoint) {
 6963           // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
 6964           assert isBmpCodePoint(codePoint);
 6965           return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
 6966       }
 6967   
 6968       /**
 6969        * The number of bits used to represent a <tt>char</tt> value in unsigned
 6970        * binary form, constant {@code 16}.
 6971        *
 6972        * @since 1.5
 6973        */
 6974       public static final int SIZE = 16;
 6975   
 6976       /**
 6977        * Returns the value obtained by reversing the order of the bytes in the
 6978        * specified <tt>char</tt> value.
 6979        *
 6980        * @return the value obtained by reversing (or, equivalently, swapping)
 6981        *     the bytes in the specified <tt>char</tt> value.
 6982        * @since 1.5
 6983        */
 6984       public static char reverseBytes(char ch) {
 6985           return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
 6986       }
 6987   
 6988       /**
 6989        * Returns the Unicode name of the specified character
 6990        * {@code codePoint}, or null if the code point is
 6991        * {@link #UNASSIGNED unassigned}.
 6992        * <p>
 6993        * Note: if the specified character is not assigned a name by
 6994        * the <i>UnicodeData</i> file (part of the Unicode Character
 6995        * Database maintained by the Unicode Consortium), the returned
 6996        * name is the same as the result of expression.
 6997        *
 6998        * <blockquote>{@code
 6999        *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
 7000        *     + " "
 7001        *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
 7002        *
 7003        * }</blockquote>
 7004        *
 7005        * @param  codePoint the character (Unicode code point)
 7006        *
 7007        * @return the Unicode name of the specified character, or null if
 7008        *         the code point is unassigned.
 7009        *
 7010        * @exception IllegalArgumentException if the specified
 7011        *            {@code codePoint} is not a valid Unicode
 7012        *            code point.
 7013        *
 7014        * @since 1.7
 7015        */
 7016       public static String getName(int codePoint) {
 7017           if (!isValidCodePoint(codePoint)) {
 7018               throw new IllegalArgumentException();
 7019           }
 7020           String name = CharacterName.get(codePoint);
 7021           if (name != null)
 7022               return name;
 7023           if (getType(codePoint) == UNASSIGNED)
 7024               return null;
 7025           UnicodeBlock block = UnicodeBlock.of(codePoint);
 7026           if (block != null)
 7027               return block.toString().replace('_', ' ') + " "
 7028                      + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
 7029           // should never come here
 7030           return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
 7031       }
 7032   }

Home » openjdk-7 » java » lang » [javadoc | source]