1 /*
2 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import java.util.Arrays;
29 import java.util.Map;
30 import java.util.HashMap;
31 import java.util.Locale;
32
33 /**
34 * The {@code Character} class wraps a value of the primitive
35 * type {@code char} in an object. An object of type
36 * {@code Character} contains a single field whose type is
37 * {@code char}.
38 * <p>
39 * In addition, this class provides several methods for determining
40 * a character's category (lowercase letter, digit, etc.) and for converting
41 * characters from uppercase to lowercase and vice versa.
42 * <p>
43 * Character information is based on the Unicode Standard, version 6.0.0.
44 * <p>
45 * The methods and data of class {@code Character} are defined by
46 * the information in the <i>UnicodeData</i> file that is part of the
47 * Unicode Character Database maintained by the Unicode
48 * Consortium. This file specifies various properties including name
49 * and general category for every defined Unicode code point or
50 * character range.
51 * <p>
52 * The file and its description are available from the Unicode Consortium at:
53 * <ul>
54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
55 * </ul>
56 *
57 * <h4><a name="unicode">Unicode Character Representations</a></h4>
58 *
59 * <p>The {@code char} data type (and therefore the value that a
60 * {@code Character} object encapsulates) are based on the
61 * original Unicode specification, which defined characters as
62 * fixed-width 16-bit entities. The Unicode Standard has since been
63 * changed to allow for characters whose representation requires more
64 * than 16 bits. The range of legal <em>code point</em>s is now
65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
66 * (Refer to the <a
67 * href="http://www.unicode.org/reports/tr27/#notation"><i>
68 * definition</i></a> of the U+<i>n</i> notation in the Unicode
69 * Standard.)
70 *
71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
73 * <a name="supplementary">Characters</a> whose code points are greater
74 * than U+FFFF are called <em>supplementary character</em>s. The Java
75 * platform uses the UTF-16 representation in {@code char} arrays and
76 * in the {@code String} and {@code StringBuffer} classes. In
77 * this representation, supplementary characters are represented as a pair
78 * of {@code char} values, the first from the <em>high-surrogates</em>
79 * range, (\uD800-\uDBFF), the second from the
80 * <em>low-surrogates</em> range (\uDC00-\uDFFF).
81 *
82 * <p>A {@code char} value, therefore, represents Basic
83 * Multilingual Plane (BMP) code points, including the surrogate
84 * code points, or code units of the UTF-16 encoding. An
85 * {@code int} value represents all Unicode code points,
86 * including supplementary code points. The lower (least significant)
87 * 21 bits of {@code int} are used to represent Unicode code
88 * points and the upper (most significant) 11 bits must be zero.
89 * Unless otherwise specified, the behavior with respect to
90 * supplementary characters and surrogate {@code char} values is
91 * as follows:
92 *
93 * <ul>
94 * <li>The methods that only accept a {@code char} value cannot support
95 * supplementary characters. They treat {@code char} values from the
96 * surrogate ranges as undefined characters. For example,
97 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
98 * this specific value if followed by any low-surrogate value in a string
99 * would represent a letter.
100 *
101 * <li>The methods that accept an {@code int} value support all
102 * Unicode characters, including supplementary characters. For
103 * example, {@code Character.isLetter(0x2F81A)} returns
104 * {@code true} because the code point value represents a letter
105 * (a CJK ideograph).
106 * </ul>
107 *
108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
109 * used for character values in the range between U+0000 and U+10FFFF,
110 * and <em>Unicode code unit</em> is used for 16-bit
111 * {@code char} values that are code units of the <em>UTF-16</em>
112 * encoding. For more information on Unicode terminology, refer to the
113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
114 *
115 * @author Lee Boynton
116 * @author Guy Steele
117 * @author Akira Tanaka
118 * @author Martin Buchholz
119 * @author Ulf Zibis
120 * @since 1.0
121 */
122 public final
123 class Character implements java.io.Serializable, Comparable<Character> {
124 /**
125 * The minimum radix available for conversion to and from strings.
126 * The constant value of this field is the smallest value permitted
127 * for the radix argument in radix-conversion methods such as the
128 * {@code digit} method, the {@code forDigit} method, and the
129 * {@code toString} method of class {@code Integer}.
130 *
131 * @see Character#digit(char, int)
132 * @see Character#forDigit(int, int)
133 * @see Integer#toString(int, int)
134 * @see Integer#valueOf(String)
135 */
136 public static final int MIN_RADIX = 2;
137
138 /**
139 * The maximum radix available for conversion to and from strings.
140 * The constant value of this field is the largest value permitted
141 * for the radix argument in radix-conversion methods such as the
142 * {@code digit} method, the {@code forDigit} method, and the
143 * {@code toString} method of class {@code Integer}.
144 *
145 * @see Character#digit(char, int)
146 * @see Character#forDigit(int, int)
147 * @see Integer#toString(int, int)
148 * @see Integer#valueOf(String)
149 */
150 public static final int MAX_RADIX = 36;
151
152 /**
153 * The constant value of this field is the smallest value of type
154 * {@code char}, {@code '\u005Cu0000'}.
155 *
156 * @since 1.0.2
157 */
158 public static final char MIN_VALUE = '\u0000';
159
160 /**
161 * The constant value of this field is the largest value of type
162 * {@code char}, {@code '\u005CuFFFF'}.
163 *
164 * @since 1.0.2
165 */
166 public static final char MAX_VALUE = '\uFFFF';
167
168 /**
169 * The {@code Class} instance representing the primitive type
170 * {@code char}.
171 *
172 * @since 1.1
173 */
174 @SuppressWarnings("unchecked")
175 public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
176
177 /*
178 * Normative general types
179 */
180
181 /*
182 * General character types
183 */
184
185 /**
186 * General category "Cn" in the Unicode specification.
187 * @since 1.1
188 */
189 public static final byte UNASSIGNED = 0;
190
191 /**
192 * General category "Lu" in the Unicode specification.
193 * @since 1.1
194 */
195 public static final byte UPPERCASE_LETTER = 1;
196
197 /**
198 * General category "Ll" in the Unicode specification.
199 * @since 1.1
200 */
201 public static final byte LOWERCASE_LETTER = 2;
202
203 /**
204 * General category "Lt" in the Unicode specification.
205 * @since 1.1
206 */
207 public static final byte TITLECASE_LETTER = 3;
208
209 /**
210 * General category "Lm" in the Unicode specification.
211 * @since 1.1
212 */
213 public static final byte MODIFIER_LETTER = 4;
214
215 /**
216 * General category "Lo" in the Unicode specification.
217 * @since 1.1
218 */
219 public static final byte OTHER_LETTER = 5;
220
221 /**
222 * General category "Mn" in the Unicode specification.
223 * @since 1.1
224 */
225 public static final byte NON_SPACING_MARK = 6;
226
227 /**
228 * General category "Me" in the Unicode specification.
229 * @since 1.1
230 */
231 public static final byte ENCLOSING_MARK = 7;
232
233 /**
234 * General category "Mc" in the Unicode specification.
235 * @since 1.1
236 */
237 public static final byte COMBINING_SPACING_MARK = 8;
238
239 /**
240 * General category "Nd" in the Unicode specification.
241 * @since 1.1
242 */
243 public static final byte DECIMAL_DIGIT_NUMBER = 9;
244
245 /**
246 * General category "Nl" in the Unicode specification.
247 * @since 1.1
248 */
249 public static final byte LETTER_NUMBER = 10;
250
251 /**
252 * General category "No" in the Unicode specification.
253 * @since 1.1
254 */
255 public static final byte OTHER_NUMBER = 11;
256
257 /**
258 * General category "Zs" in the Unicode specification.
259 * @since 1.1
260 */
261 public static final byte SPACE_SEPARATOR = 12;
262
263 /**
264 * General category "Zl" in the Unicode specification.
265 * @since 1.1
266 */
267 public static final byte LINE_SEPARATOR = 13;
268
269 /**
270 * General category "Zp" in the Unicode specification.
271 * @since 1.1
272 */
273 public static final byte PARAGRAPH_SEPARATOR = 14;
274
275 /**
276 * General category "Cc" in the Unicode specification.
277 * @since 1.1
278 */
279 public static final byte CONTROL = 15;
280
281 /**
282 * General category "Cf" in the Unicode specification.
283 * @since 1.1
284 */
285 public static final byte FORMAT = 16;
286
287 /**
288 * General category "Co" in the Unicode specification.
289 * @since 1.1
290 */
291 public static final byte PRIVATE_USE = 18;
292
293 /**
294 * General category "Cs" in the Unicode specification.
295 * @since 1.1
296 */
297 public static final byte SURROGATE = 19;
298
299 /**
300 * General category "Pd" in the Unicode specification.
301 * @since 1.1
302 */
303 public static final byte DASH_PUNCTUATION = 20;
304
305 /**
306 * General category "Ps" in the Unicode specification.
307 * @since 1.1
308 */
309 public static final byte START_PUNCTUATION = 21;
310
311 /**
312 * General category "Pe" in the Unicode specification.
313 * @since 1.1
314 */
315 public static final byte END_PUNCTUATION = 22;
316
317 /**
318 * General category "Pc" in the Unicode specification.
319 * @since 1.1
320 */
321 public static final byte CONNECTOR_PUNCTUATION = 23;
322
323 /**
324 * General category "Po" in the Unicode specification.
325 * @since 1.1
326 */
327 public static final byte OTHER_PUNCTUATION = 24;
328
329 /**
330 * General category "Sm" in the Unicode specification.
331 * @since 1.1
332 */
333 public static final byte MATH_SYMBOL = 25;
334
335 /**
336 * General category "Sc" in the Unicode specification.
337 * @since 1.1
338 */
339 public static final byte CURRENCY_SYMBOL = 26;
340
341 /**
342 * General category "Sk" in the Unicode specification.
343 * @since 1.1
344 */
345 public static final byte MODIFIER_SYMBOL = 27;
346
347 /**
348 * General category "So" in the Unicode specification.
349 * @since 1.1
350 */
351 public static final byte OTHER_SYMBOL = 28;
352
353 /**
354 * General category "Pi" in the Unicode specification.
355 * @since 1.4
356 */
357 public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
358
359 /**
360 * General category "Pf" in the Unicode specification.
361 * @since 1.4
362 */
363 public static final byte FINAL_QUOTE_PUNCTUATION = 30;
364
365 /**
366 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
367 */
368 static final int ERROR = 0xFFFFFFFF;
369
370
371 /**
372 * Undefined bidirectional character type. Undefined {@code char}
373 * values have undefined directionality in the Unicode specification.
374 * @since 1.4
375 */
376 public static final byte DIRECTIONALITY_UNDEFINED = -1;
377
378 /**
379 * Strong bidirectional character type "L" in the Unicode specification.
380 * @since 1.4
381 */
382 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
383
384 /**
385 * Strong bidirectional character type "R" in the Unicode specification.
386 * @since 1.4
387 */
388 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
389
390 /**
391 * Strong bidirectional character type "AL" in the Unicode specification.
392 * @since 1.4
393 */
394 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
395
396 /**
397 * Weak bidirectional character type "EN" in the Unicode specification.
398 * @since 1.4
399 */
400 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
401
402 /**
403 * Weak bidirectional character type "ES" in the Unicode specification.
404 * @since 1.4
405 */
406 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
407
408 /**
409 * Weak bidirectional character type "ET" in the Unicode specification.
410 * @since 1.4
411 */
412 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
413
414 /**
415 * Weak bidirectional character type "AN" in the Unicode specification.
416 * @since 1.4
417 */
418 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
419
420 /**
421 * Weak bidirectional character type "CS" in the Unicode specification.
422 * @since 1.4
423 */
424 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
425
426 /**
427 * Weak bidirectional character type "NSM" in the Unicode specification.
428 * @since 1.4
429 */
430 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
431
432 /**
433 * Weak bidirectional character type "BN" in the Unicode specification.
434 * @since 1.4
435 */
436 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
437
438 /**
439 * Neutral bidirectional character type "B" in the Unicode specification.
440 * @since 1.4
441 */
442 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
443
444 /**
445 * Neutral bidirectional character type "S" in the Unicode specification.
446 * @since 1.4
447 */
448 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
449
450 /**
451 * Neutral bidirectional character type "WS" in the Unicode specification.
452 * @since 1.4
453 */
454 public static final byte DIRECTIONALITY_WHITESPACE = 12;
455
456 /**
457 * Neutral bidirectional character type "ON" in the Unicode specification.
458 * @since 1.4
459 */
460 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
461
462 /**
463 * Strong bidirectional character type "LRE" in the Unicode specification.
464 * @since 1.4
465 */
466 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
467
468 /**
469 * Strong bidirectional character type "LRO" in the Unicode specification.
470 * @since 1.4
471 */
472 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
473
474 /**
475 * Strong bidirectional character type "RLE" in the Unicode specification.
476 * @since 1.4
477 */
478 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
479
480 /**
481 * Strong bidirectional character type "RLO" in the Unicode specification.
482 * @since 1.4
483 */
484 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
485
486 /**
487 * Weak bidirectional character type "PDF" in the Unicode specification.
488 * @since 1.4
489 */
490 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
491
492 /**
493 * The minimum value of a
494 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
495 * Unicode high-surrogate code unit</a>
496 * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
497 * A high-surrogate is also known as a <i>leading-surrogate</i>.
498 *
499 * @since 1.5
500 */
501 public static final char MIN_HIGH_SURROGATE = '\uD800';
502
503 /**
504 * The maximum value of a
505 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
506 * Unicode high-surrogate code unit</a>
507 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
508 * A high-surrogate is also known as a <i>leading-surrogate</i>.
509 *
510 * @since 1.5
511 */
512 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
513
514 /**
515 * The minimum value of a
516 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
517 * Unicode low-surrogate code unit</a>
518 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
519 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
520 *
521 * @since 1.5
522 */
523 public static final char MIN_LOW_SURROGATE = '\uDC00';
524
525 /**
526 * The maximum value of a
527 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
528 * Unicode low-surrogate code unit</a>
529 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
530 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
531 *
532 * @since 1.5
533 */
534 public static final char MAX_LOW_SURROGATE = '\uDFFF';
535
536 /**
537 * The minimum value of a Unicode surrogate code unit in the
538 * UTF-16 encoding, constant {@code '\u005CuD800'}.
539 *
540 * @since 1.5
541 */
542 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
543
544 /**
545 * The maximum value of a Unicode surrogate code unit in the
546 * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
547 *
548 * @since 1.5
549 */
550 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
551
552 /**
553 * The minimum value of a
554 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
555 * Unicode supplementary code point</a>, constant {@code U+10000}.
556 *
557 * @since 1.5
558 */
559 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
560
561 /**
562 * The minimum value of a
563 * <a href="http://www.unicode.org/glossary/#code_point">
564 * Unicode code point</a>, constant {@code U+0000}.
565 *
566 * @since 1.5
567 */
568 public static final int MIN_CODE_POINT = 0x000000;
569
570 /**
571 * The maximum value of a
572 * <a href="http://www.unicode.org/glossary/#code_point">
573 * Unicode code point</a>, constant {@code U+10FFFF}.
574 *
575 * @since 1.5
576 */
577 public static final int MAX_CODE_POINT = 0X10FFFF;
578
579
580 /**
581 * Instances of this class represent particular subsets of the Unicode
582 * character set. The only family of subsets defined in the
583 * {@code Character} class is {@link Character.UnicodeBlock}.
584 * Other portions of the Java API may define other subsets for their
585 * own purposes.
586 *
587 * @since 1.2
588 */
589 public static class Subset {
590
591 private String name;
592
593 /**
594 * Constructs a new {@code Subset} instance.
595 *
596 * @param name The name of this subset
597 * @exception NullPointerException if name is {@code null}
598 */
599 protected Subset(String name) {
600 if (name == null) {
601 throw new NullPointerException("name");
602 }
603 this.name = name;
604 }
605
606 /**
607 * Compares two {@code Subset} objects for equality.
608 * This method returns {@code true} if and only if
609 * {@code this} and the argument refer to the same
610 * object; since this method is {@code final}, this
611 * guarantee holds for all subclasses.
612 */
613 public final boolean equals(Object obj) {
614 return (this == obj);
615 }
616
617 /**
618 * Returns the standard hash code as defined by the
619 * {@link Object#hashCode} method. This method
620 * is {@code final} in order to ensure that the
621 * {@code equals} and {@code hashCode} methods will
622 * be consistent in all subclasses.
623 */
624 public final int hashCode() {
625 return super.hashCode();
626 }
627
628 /**
629 * Returns the name of this subset.
630 */
631 public final String toString() {
632 return name;
633 }
634 }
635
636 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
637 // for the latest specification of Unicode Blocks.
638
639 /**
640 * A family of character subsets representing the character blocks in the
641 * Unicode specification. Character blocks generally define characters
642 * used for a specific script or purpose. A character is contained by
643 * at most one Unicode block.
644 *
645 * @since 1.2
646 */
647 public static final class UnicodeBlock extends Subset {
648
649 private static Map<String, UnicodeBlock> map = new HashMap<>(256);
650
651 /**
652 * Creates a UnicodeBlock with the given identifier name.
653 * This name must be the same as the block identifier.
654 */
655 private UnicodeBlock(String idName) {
656 super(idName);
657 map.put(idName, this);
658 }
659
660 /**
661 * Creates a UnicodeBlock with the given identifier name and
662 * alias name.
663 */
664 private UnicodeBlock(String idName, String alias) {
665 this(idName);
666 map.put(alias, this);
667 }
668
669 /**
670 * Creates a UnicodeBlock with the given identifier name and
671 * alias names.
672 */
673 private UnicodeBlock(String idName, String... aliases) {
674 this(idName);
675 for (String alias : aliases)
676 map.put(alias, this);
677 }
678
679 /**
680 * Constant for the "Basic Latin" Unicode character block.
681 * @since 1.2
682 */
683 public static final UnicodeBlock BASIC_LATIN =
684 new UnicodeBlock("BASIC_LATIN",
685 "BASIC LATIN",
686 "BASICLATIN");
687
688 /**
689 * Constant for the "Latin-1 Supplement" Unicode character block.
690 * @since 1.2
691 */
692 public static final UnicodeBlock LATIN_1_SUPPLEMENT =
693 new UnicodeBlock("LATIN_1_SUPPLEMENT",
694 "LATIN-1 SUPPLEMENT",
695 "LATIN-1SUPPLEMENT");
696
697 /**
698 * Constant for the "Latin Extended-A" Unicode character block.
699 * @since 1.2
700 */
701 public static final UnicodeBlock LATIN_EXTENDED_A =
702 new UnicodeBlock("LATIN_EXTENDED_A",
703 "LATIN EXTENDED-A",
704 "LATINEXTENDED-A");
705
706 /**
707 * Constant for the "Latin Extended-B" Unicode character block.
708 * @since 1.2
709 */
710 public static final UnicodeBlock LATIN_EXTENDED_B =
711 new UnicodeBlock("LATIN_EXTENDED_B",
712 "LATIN EXTENDED-B",
713 "LATINEXTENDED-B");
714
715 /**
716 * Constant for the "IPA Extensions" Unicode character block.
717 * @since 1.2
718 */
719 public static final UnicodeBlock IPA_EXTENSIONS =
720 new UnicodeBlock("IPA_EXTENSIONS",
721 "IPA EXTENSIONS",
722 "IPAEXTENSIONS");
723
724 /**
725 * Constant for the "Spacing Modifier Letters" Unicode character block.
726 * @since 1.2
727 */
728 public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
729 new UnicodeBlock("SPACING_MODIFIER_LETTERS",
730 "SPACING MODIFIER LETTERS",
731 "SPACINGMODIFIERLETTERS");
732
733 /**
734 * Constant for the "Combining Diacritical Marks" Unicode character block.
735 * @since 1.2
736 */
737 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
738 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
739 "COMBINING DIACRITICAL MARKS",
740 "COMBININGDIACRITICALMARKS");
741
742 /**
743 * Constant for the "Greek and Coptic" Unicode character block.
744 * <p>
745 * This block was previously known as the "Greek" block.
746 *
747 * @since 1.2
748 */
749 public static final UnicodeBlock GREEK =
750 new UnicodeBlock("GREEK",
751 "GREEK AND COPTIC",
752 "GREEKANDCOPTIC");
753
754 /**
755 * Constant for the "Cyrillic" Unicode character block.
756 * @since 1.2
757 */
758 public static final UnicodeBlock CYRILLIC =
759 new UnicodeBlock("CYRILLIC");
760
761 /**
762 * Constant for the "Armenian" Unicode character block.
763 * @since 1.2
764 */
765 public static final UnicodeBlock ARMENIAN =
766 new UnicodeBlock("ARMENIAN");
767
768 /**
769 * Constant for the "Hebrew" Unicode character block.
770 * @since 1.2
771 */
772 public static final UnicodeBlock HEBREW =
773 new UnicodeBlock("HEBREW");
774
775 /**
776 * Constant for the "Arabic" Unicode character block.
777 * @since 1.2
778 */
779 public static final UnicodeBlock ARABIC =
780 new UnicodeBlock("ARABIC");
781
782 /**
783 * Constant for the "Devanagari" Unicode character block.
784 * @since 1.2
785 */
786 public static final UnicodeBlock DEVANAGARI =
787 new UnicodeBlock("DEVANAGARI");
788
789 /**
790 * Constant for the "Bengali" Unicode character block.
791 * @since 1.2
792 */
793 public static final UnicodeBlock BENGALI =
794 new UnicodeBlock("BENGALI");
795
796 /**
797 * Constant for the "Gurmukhi" Unicode character block.
798 * @since 1.2
799 */
800 public static final UnicodeBlock GURMUKHI =
801 new UnicodeBlock("GURMUKHI");
802
803 /**
804 * Constant for the "Gujarati" Unicode character block.
805 * @since 1.2
806 */
807 public static final UnicodeBlock GUJARATI =
808 new UnicodeBlock("GUJARATI");
809
810 /**
811 * Constant for the "Oriya" Unicode character block.
812 * @since 1.2
813 */
814 public static final UnicodeBlock ORIYA =
815 new UnicodeBlock("ORIYA");
816
817 /**
818 * Constant for the "Tamil" Unicode character block.
819 * @since 1.2
820 */
821 public static final UnicodeBlock TAMIL =
822 new UnicodeBlock("TAMIL");
823
824 /**
825 * Constant for the "Telugu" Unicode character block.
826 * @since 1.2
827 */
828 public static final UnicodeBlock TELUGU =
829 new UnicodeBlock("TELUGU");
830
831 /**
832 * Constant for the "Kannada" Unicode character block.
833 * @since 1.2
834 */
835 public static final UnicodeBlock KANNADA =
836 new UnicodeBlock("KANNADA");
837
838 /**
839 * Constant for the "Malayalam" Unicode character block.
840 * @since 1.2
841 */
842 public static final UnicodeBlock MALAYALAM =
843 new UnicodeBlock("MALAYALAM");
844
845 /**
846 * Constant for the "Thai" Unicode character block.
847 * @since 1.2
848 */
849 public static final UnicodeBlock THAI =
850 new UnicodeBlock("THAI");
851
852 /**
853 * Constant for the "Lao" Unicode character block.
854 * @since 1.2
855 */
856 public static final UnicodeBlock LAO =
857 new UnicodeBlock("LAO");
858
859 /**
860 * Constant for the "Tibetan" Unicode character block.
861 * @since 1.2
862 */
863 public static final UnicodeBlock TIBETAN =
864 new UnicodeBlock("TIBETAN");
865
866 /**
867 * Constant for the "Georgian" Unicode character block.
868 * @since 1.2
869 */
870 public static final UnicodeBlock GEORGIAN =
871 new UnicodeBlock("GEORGIAN");
872
873 /**
874 * Constant for the "Hangul Jamo" Unicode character block.
875 * @since 1.2
876 */
877 public static final UnicodeBlock HANGUL_JAMO =
878 new UnicodeBlock("HANGUL_JAMO",
879 "HANGUL JAMO",
880 "HANGULJAMO");
881
882 /**
883 * Constant for the "Latin Extended Additional" Unicode character block.
884 * @since 1.2
885 */
886 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
887 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
888 "LATIN EXTENDED ADDITIONAL",
889 "LATINEXTENDEDADDITIONAL");
890
891 /**
892 * Constant for the "Greek Extended" Unicode character block.
893 * @since 1.2
894 */
895 public static final UnicodeBlock GREEK_EXTENDED =
896 new UnicodeBlock("GREEK_EXTENDED",
897 "GREEK EXTENDED",
898 "GREEKEXTENDED");
899
900 /**
901 * Constant for the "General Punctuation" Unicode character block.
902 * @since 1.2
903 */
904 public static final UnicodeBlock GENERAL_PUNCTUATION =
905 new UnicodeBlock("GENERAL_PUNCTUATION",
906 "GENERAL PUNCTUATION",
907 "GENERALPUNCTUATION");
908
909 /**
910 * Constant for the "Superscripts and Subscripts" Unicode character
911 * block.
912 * @since 1.2
913 */
914 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
915 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
916 "SUPERSCRIPTS AND SUBSCRIPTS",
917 "SUPERSCRIPTSANDSUBSCRIPTS");
918
919 /**
920 * Constant for the "Currency Symbols" Unicode character block.
921 * @since 1.2
922 */
923 public static final UnicodeBlock CURRENCY_SYMBOLS =
924 new UnicodeBlock("CURRENCY_SYMBOLS",
925 "CURRENCY SYMBOLS",
926 "CURRENCYSYMBOLS");
927
928 /**
929 * Constant for the "Combining Diacritical Marks for Symbols" Unicode
930 * character block.
931 * <p>
932 * This block was previously known as "Combining Marks for Symbols".
933 * @since 1.2
934 */
935 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
936 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
937 "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
938 "COMBININGDIACRITICALMARKSFORSYMBOLS",
939 "COMBINING MARKS FOR SYMBOLS",
940 "COMBININGMARKSFORSYMBOLS");
941
942 /**
943 * Constant for the "Letterlike Symbols" Unicode character block.
944 * @since 1.2
945 */
946 public static final UnicodeBlock LETTERLIKE_SYMBOLS =
947 new UnicodeBlock("LETTERLIKE_SYMBOLS",
948 "LETTERLIKE SYMBOLS",
949 "LETTERLIKESYMBOLS");
950
951 /**
952 * Constant for the "Number Forms" Unicode character block.
953 * @since 1.2
954 */
955 public static final UnicodeBlock NUMBER_FORMS =
956 new UnicodeBlock("NUMBER_FORMS",
957 "NUMBER FORMS",
958 "NUMBERFORMS");
959
960 /**
961 * Constant for the "Arrows" Unicode character block.
962 * @since 1.2
963 */
964 public static final UnicodeBlock ARROWS =
965 new UnicodeBlock("ARROWS");
966
967 /**
968 * Constant for the "Mathematical Operators" Unicode character block.
969 * @since 1.2
970 */
971 public static final UnicodeBlock MATHEMATICAL_OPERATORS =
972 new UnicodeBlock("MATHEMATICAL_OPERATORS",
973 "MATHEMATICAL OPERATORS",
974 "MATHEMATICALOPERATORS");
975
976 /**
977 * Constant for the "Miscellaneous Technical" Unicode character block.
978 * @since 1.2
979 */
980 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
981 new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
982 "MISCELLANEOUS TECHNICAL",
983 "MISCELLANEOUSTECHNICAL");
984
985 /**
986 * Constant for the "Control Pictures" Unicode character block.
987 * @since 1.2
988 */
989 public static final UnicodeBlock CONTROL_PICTURES =
990 new UnicodeBlock("CONTROL_PICTURES",
991 "CONTROL PICTURES",
992 "CONTROLPICTURES");
993
994 /**
995 * Constant for the "Optical Character Recognition" Unicode character block.
996 * @since 1.2
997 */
998 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
999 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1000 "OPTICAL CHARACTER RECOGNITION",
1001 "OPTICALCHARACTERRECOGNITION");
1002
1003 /**
1004 * Constant for the "Enclosed Alphanumerics" Unicode character block.
1005 * @since 1.2
1006 */
1007 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1008 new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1009 "ENCLOSED ALPHANUMERICS",
1010 "ENCLOSEDALPHANUMERICS");
1011
1012 /**
1013 * Constant for the "Box Drawing" Unicode character block.
1014 * @since 1.2
1015 */
1016 public static final UnicodeBlock BOX_DRAWING =
1017 new UnicodeBlock("BOX_DRAWING",
1018 "BOX DRAWING",
1019 "BOXDRAWING");
1020
1021 /**
1022 * Constant for the "Block Elements" Unicode character block.
1023 * @since 1.2
1024 */
1025 public static final UnicodeBlock BLOCK_ELEMENTS =
1026 new UnicodeBlock("BLOCK_ELEMENTS",
1027 "BLOCK ELEMENTS",
1028 "BLOCKELEMENTS");
1029
1030 /**
1031 * Constant for the "Geometric Shapes" Unicode character block.
1032 * @since 1.2
1033 */
1034 public static final UnicodeBlock GEOMETRIC_SHAPES =
1035 new UnicodeBlock("GEOMETRIC_SHAPES",
1036 "GEOMETRIC SHAPES",
1037 "GEOMETRICSHAPES");
1038
1039 /**
1040 * Constant for the "Miscellaneous Symbols" Unicode character block.
1041 * @since 1.2
1042 */
1043 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1044 new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1045 "MISCELLANEOUS SYMBOLS",
1046 "MISCELLANEOUSSYMBOLS");
1047
1048 /**
1049 * Constant for the "Dingbats" Unicode character block.
1050 * @since 1.2
1051 */
1052 public static final UnicodeBlock DINGBATS =
1053 new UnicodeBlock("DINGBATS");
1054
1055 /**
1056 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1057 * @since 1.2
1058 */
1059 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1060 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1061 "CJK SYMBOLS AND PUNCTUATION",
1062 "CJKSYMBOLSANDPUNCTUATION");
1063
1064 /**
1065 * Constant for the "Hiragana" Unicode character block.
1066 * @since 1.2
1067 */
1068 public static final UnicodeBlock HIRAGANA =
1069 new UnicodeBlock("HIRAGANA");
1070
1071 /**
1072 * Constant for the "Katakana" Unicode character block.
1073 * @since 1.2
1074 */
1075 public static final UnicodeBlock KATAKANA =
1076 new UnicodeBlock("KATAKANA");
1077
1078 /**
1079 * Constant for the "Bopomofo" Unicode character block.
1080 * @since 1.2
1081 */
1082 public static final UnicodeBlock BOPOMOFO =
1083 new UnicodeBlock("BOPOMOFO");
1084
1085 /**
1086 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1087 * @since 1.2
1088 */
1089 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1090 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1091 "HANGUL COMPATIBILITY JAMO",
1092 "HANGULCOMPATIBILITYJAMO");
1093
1094 /**
1095 * Constant for the "Kanbun" Unicode character block.
1096 * @since 1.2
1097 */
1098 public static final UnicodeBlock KANBUN =
1099 new UnicodeBlock("KANBUN");
1100
1101 /**
1102 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1103 * @since 1.2
1104 */
1105 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1106 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1107 "ENCLOSED CJK LETTERS AND MONTHS",
1108 "ENCLOSEDCJKLETTERSANDMONTHS");
1109
1110 /**
1111 * Constant for the "CJK Compatibility" Unicode character block.
1112 * @since 1.2
1113 */
1114 public static final UnicodeBlock CJK_COMPATIBILITY =
1115 new UnicodeBlock("CJK_COMPATIBILITY",
1116 "CJK COMPATIBILITY",
1117 "CJKCOMPATIBILITY");
1118
1119 /**
1120 * Constant for the "CJK Unified Ideographs" Unicode character block.
1121 * @since 1.2
1122 */
1123 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1124 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1125 "CJK UNIFIED IDEOGRAPHS",
1126 "CJKUNIFIEDIDEOGRAPHS");
1127
1128 /**
1129 * Constant for the "Hangul Syllables" Unicode character block.
1130 * @since 1.2
1131 */
1132 public static final UnicodeBlock HANGUL_SYLLABLES =
1133 new UnicodeBlock("HANGUL_SYLLABLES",
1134 "HANGUL SYLLABLES",
1135 "HANGULSYLLABLES");
1136
1137 /**
1138 * Constant for the "Private Use Area" Unicode character block.
1139 * @since 1.2
1140 */
1141 public static final UnicodeBlock PRIVATE_USE_AREA =
1142 new UnicodeBlock("PRIVATE_USE_AREA",
1143 "PRIVATE USE AREA",
1144 "PRIVATEUSEAREA");
1145
1146 /**
1147 * Constant for the "CJK Compatibility Ideographs" Unicode character
1148 * block.
1149 * @since 1.2
1150 */
1151 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1152 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1153 "CJK COMPATIBILITY IDEOGRAPHS",
1154 "CJKCOMPATIBILITYIDEOGRAPHS");
1155
1156 /**
1157 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1158 * @since 1.2
1159 */
1160 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1161 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1162 "ALPHABETIC PRESENTATION FORMS",
1163 "ALPHABETICPRESENTATIONFORMS");
1164
1165 /**
1166 * Constant for the "Arabic Presentation Forms-A" Unicode character
1167 * block.
1168 * @since 1.2
1169 */
1170 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1171 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1172 "ARABIC PRESENTATION FORMS-A",
1173 "ARABICPRESENTATIONFORMS-A");
1174
1175 /**
1176 * Constant for the "Combining Half Marks" Unicode character block.
1177 * @since 1.2
1178 */
1179 public static final UnicodeBlock COMBINING_HALF_MARKS =
1180 new UnicodeBlock("COMBINING_HALF_MARKS",
1181 "COMBINING HALF MARKS",
1182 "COMBININGHALFMARKS");
1183
1184 /**
1185 * Constant for the "CJK Compatibility Forms" Unicode character block.
1186 * @since 1.2
1187 */
1188 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1189 new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1190 "CJK COMPATIBILITY FORMS",
1191 "CJKCOMPATIBILITYFORMS");
1192
1193 /**
1194 * Constant for the "Small Form Variants" Unicode character block.
1195 * @since 1.2
1196 */
1197 public static final UnicodeBlock SMALL_FORM_VARIANTS =
1198 new UnicodeBlock("SMALL_FORM_VARIANTS",
1199 "SMALL FORM VARIANTS",
1200 "SMALLFORMVARIANTS");
1201
1202 /**
1203 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1204 * @since 1.2
1205 */
1206 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1207 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1208 "ARABIC PRESENTATION FORMS-B",
1209 "ARABICPRESENTATIONFORMS-B");
1210
1211 /**
1212 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1213 * block.
1214 * @since 1.2
1215 */
1216 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1217 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1218 "HALFWIDTH AND FULLWIDTH FORMS",
1219 "HALFWIDTHANDFULLWIDTHFORMS");
1220
1221 /**
1222 * Constant for the "Specials" Unicode character block.
1223 * @since 1.2
1224 */
1225 public static final UnicodeBlock SPECIALS =
1226 new UnicodeBlock("SPECIALS");
1227
1228 /**
1229 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1230 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1231 * {@link #LOW_SURROGATES}. These new constants match
1232 * the block definitions of the Unicode Standard.
1233 * The {@link #of(char)} and {@link #of(int)} methods
1234 * return the new constants, not SURROGATES_AREA.
1235 */
1236 @Deprecated
1237 public static final UnicodeBlock SURROGATES_AREA =
1238 new UnicodeBlock("SURROGATES_AREA");
1239
1240 /**
1241 * Constant for the "Syriac" Unicode character block.
1242 * @since 1.4
1243 */
1244 public static final UnicodeBlock SYRIAC =
1245 new UnicodeBlock("SYRIAC");
1246
1247 /**
1248 * Constant for the "Thaana" Unicode character block.
1249 * @since 1.4
1250 */
1251 public static final UnicodeBlock THAANA =
1252 new UnicodeBlock("THAANA");
1253
1254 /**
1255 * Constant for the "Sinhala" Unicode character block.
1256 * @since 1.4
1257 */
1258 public static final UnicodeBlock SINHALA =
1259 new UnicodeBlock("SINHALA");
1260
1261 /**
1262 * Constant for the "Myanmar" Unicode character block.
1263 * @since 1.4
1264 */
1265 public static final UnicodeBlock MYANMAR =
1266 new UnicodeBlock("MYANMAR");
1267
1268 /**
1269 * Constant for the "Ethiopic" Unicode character block.
1270 * @since 1.4
1271 */
1272 public static final UnicodeBlock ETHIOPIC =
1273 new UnicodeBlock("ETHIOPIC");
1274
1275 /**
1276 * Constant for the "Cherokee" Unicode character block.
1277 * @since 1.4
1278 */
1279 public static final UnicodeBlock CHEROKEE =
1280 new UnicodeBlock("CHEROKEE");
1281
1282 /**
1283 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1284 * @since 1.4
1285 */
1286 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1287 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1288 "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1289 "UNIFIEDCANADIANABORIGINALSYLLABICS");
1290
1291 /**
1292 * Constant for the "Ogham" Unicode character block.
1293 * @since 1.4
1294 */
1295 public static final UnicodeBlock OGHAM =
1296 new UnicodeBlock("OGHAM");
1297
1298 /**
1299 * Constant for the "Runic" Unicode character block.
1300 * @since 1.4
1301 */
1302 public static final UnicodeBlock RUNIC =
1303 new UnicodeBlock("RUNIC");
1304
1305 /**
1306 * Constant for the "Khmer" Unicode character block.
1307 * @since 1.4
1308 */
1309 public static final UnicodeBlock KHMER =
1310 new UnicodeBlock("KHMER");
1311
1312 /**
1313 * Constant for the "Mongolian" Unicode character block.
1314 * @since 1.4
1315 */
1316 public static final UnicodeBlock MONGOLIAN =
1317 new UnicodeBlock("MONGOLIAN");
1318
1319 /**
1320 * Constant for the "Braille Patterns" Unicode character block.
1321 * @since 1.4
1322 */
1323 public static final UnicodeBlock BRAILLE_PATTERNS =
1324 new UnicodeBlock("BRAILLE_PATTERNS",
1325 "BRAILLE PATTERNS",
1326 "BRAILLEPATTERNS");
1327
1328 /**
1329 * Constant for the "CJK Radicals Supplement" Unicode character block.
1330 * @since 1.4
1331 */
1332 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1333 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1334 "CJK RADICALS SUPPLEMENT",
1335 "CJKRADICALSSUPPLEMENT");
1336
1337 /**
1338 * Constant for the "Kangxi Radicals" Unicode character block.
1339 * @since 1.4
1340 */
1341 public static final UnicodeBlock KANGXI_RADICALS =
1342 new UnicodeBlock("KANGXI_RADICALS",
1343 "KANGXI RADICALS",
1344 "KANGXIRADICALS");
1345
1346 /**
1347 * Constant for the "Ideographic Description Characters" Unicode character block.
1348 * @since 1.4
1349 */
1350 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1351 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1352 "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1353 "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1354
1355 /**
1356 * Constant for the "Bopomofo Extended" Unicode character block.
1357 * @since 1.4
1358 */
1359 public static final UnicodeBlock BOPOMOFO_EXTENDED =
1360 new UnicodeBlock("BOPOMOFO_EXTENDED",
1361 "BOPOMOFO EXTENDED",
1362 "BOPOMOFOEXTENDED");
1363
1364 /**
1365 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1366 * @since 1.4
1367 */
1368 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1369 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1370 "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1371 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1372
1373 /**
1374 * Constant for the "Yi Syllables" Unicode character block.
1375 * @since 1.4
1376 */
1377 public static final UnicodeBlock YI_SYLLABLES =
1378 new UnicodeBlock("YI_SYLLABLES",
1379 "YI SYLLABLES",
1380 "YISYLLABLES");
1381
1382 /**
1383 * Constant for the "Yi Radicals" Unicode character block.
1384 * @since 1.4
1385 */
1386 public static final UnicodeBlock YI_RADICALS =
1387 new UnicodeBlock("YI_RADICALS",
1388 "YI RADICALS",
1389 "YIRADICALS");
1390
1391 /**
1392 * Constant for the "Cyrillic Supplementary" Unicode character block.
1393 * @since 1.5
1394 */
1395 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1396 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1397 "CYRILLIC SUPPLEMENTARY",
1398 "CYRILLICSUPPLEMENTARY",
1399 "CYRILLIC SUPPLEMENT",
1400 "CYRILLICSUPPLEMENT");
1401
1402 /**
1403 * Constant for the "Tagalog" Unicode character block.
1404 * @since 1.5
1405 */
1406 public static final UnicodeBlock TAGALOG =
1407 new UnicodeBlock("TAGALOG");
1408
1409 /**
1410 * Constant for the "Hanunoo" Unicode character block.
1411 * @since 1.5
1412 */
1413 public static final UnicodeBlock HANUNOO =
1414 new UnicodeBlock("HANUNOO");
1415
1416 /**
1417 * Constant for the "Buhid" Unicode character block.
1418 * @since 1.5
1419 */
1420 public static final UnicodeBlock BUHID =
1421 new UnicodeBlock("BUHID");
1422
1423 /**
1424 * Constant for the "Tagbanwa" Unicode character block.
1425 * @since 1.5
1426 */
1427 public static final UnicodeBlock TAGBANWA =
1428 new UnicodeBlock("TAGBANWA");
1429
1430 /**
1431 * Constant for the "Limbu" Unicode character block.
1432 * @since 1.5
1433 */
1434 public static final UnicodeBlock LIMBU =
1435 new UnicodeBlock("LIMBU");
1436
1437 /**
1438 * Constant for the "Tai Le" Unicode character block.
1439 * @since 1.5
1440 */
1441 public static final UnicodeBlock TAI_LE =
1442 new UnicodeBlock("TAI_LE",
1443 "TAI LE",
1444 "TAILE");
1445
1446 /**
1447 * Constant for the "Khmer Symbols" Unicode character block.
1448 * @since 1.5
1449 */
1450 public static final UnicodeBlock KHMER_SYMBOLS =
1451 new UnicodeBlock("KHMER_SYMBOLS",
1452 "KHMER SYMBOLS",
1453 "KHMERSYMBOLS");
1454
1455 /**
1456 * Constant for the "Phonetic Extensions" Unicode character block.
1457 * @since 1.5
1458 */
1459 public static final UnicodeBlock PHONETIC_EXTENSIONS =
1460 new UnicodeBlock("PHONETIC_EXTENSIONS",
1461 "PHONETIC EXTENSIONS",
1462 "PHONETICEXTENSIONS");
1463
1464 /**
1465 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1466 * @since 1.5
1467 */
1468 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1469 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1470 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1471 "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1472
1473 /**
1474 * Constant for the "Supplemental Arrows-A" Unicode character block.
1475 * @since 1.5
1476 */
1477 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1478 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1479 "SUPPLEMENTAL ARROWS-A",
1480 "SUPPLEMENTALARROWS-A");
1481
1482 /**
1483 * Constant for the "Supplemental Arrows-B" Unicode character block.
1484 * @since 1.5
1485 */
1486 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1487 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1488 "SUPPLEMENTAL ARROWS-B",
1489 "SUPPLEMENTALARROWS-B");
1490
1491 /**
1492 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1493 * character block.
1494 * @since 1.5
1495 */
1496 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1497 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1498 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1499 "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1500
1501 /**
1502 * Constant for the "Supplemental Mathematical Operators" Unicode
1503 * character block.
1504 * @since 1.5
1505 */
1506 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1507 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1508 "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1509 "SUPPLEMENTALMATHEMATICALOPERATORS");
1510
1511 /**
1512 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1513 * block.
1514 * @since 1.5
1515 */
1516 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1517 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1518 "MISCELLANEOUS SYMBOLS AND ARROWS",
1519 "MISCELLANEOUSSYMBOLSANDARROWS");
1520
1521 /**
1522 * Constant for the "Katakana Phonetic Extensions" Unicode character
1523 * block.
1524 * @since 1.5
1525 */
1526 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1527 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1528 "KATAKANA PHONETIC EXTENSIONS",
1529 "KATAKANAPHONETICEXTENSIONS");
1530
1531 /**
1532 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1533 * @since 1.5
1534 */
1535 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1536 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1537 "YIJING HEXAGRAM SYMBOLS",
1538 "YIJINGHEXAGRAMSYMBOLS");
1539
1540 /**
1541 * Constant for the "Variation Selectors" Unicode character block.
1542 * @since 1.5
1543 */
1544 public static final UnicodeBlock VARIATION_SELECTORS =
1545 new UnicodeBlock("VARIATION_SELECTORS",
1546 "VARIATION SELECTORS",
1547 "VARIATIONSELECTORS");
1548
1549 /**
1550 * Constant for the "Linear B Syllabary" Unicode character block.
1551 * @since 1.5
1552 */
1553 public static final UnicodeBlock LINEAR_B_SYLLABARY =
1554 new UnicodeBlock("LINEAR_B_SYLLABARY",
1555 "LINEAR B SYLLABARY",
1556 "LINEARBSYLLABARY");
1557
1558 /**
1559 * Constant for the "Linear B Ideograms" Unicode character block.
1560 * @since 1.5
1561 */
1562 public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1563 new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1564 "LINEAR B IDEOGRAMS",
1565 "LINEARBIDEOGRAMS");
1566
1567 /**
1568 * Constant for the "Aegean Numbers" Unicode character block.
1569 * @since 1.5
1570 */
1571 public static final UnicodeBlock AEGEAN_NUMBERS =
1572 new UnicodeBlock("AEGEAN_NUMBERS",
1573 "AEGEAN NUMBERS",
1574 "AEGEANNUMBERS");
1575
1576 /**
1577 * Constant for the "Old Italic" Unicode character block.
1578 * @since 1.5
1579 */
1580 public static final UnicodeBlock OLD_ITALIC =
1581 new UnicodeBlock("OLD_ITALIC",
1582 "OLD ITALIC",
1583 "OLDITALIC");
1584
1585 /**
1586 * Constant for the "Gothic" Unicode character block.
1587 * @since 1.5
1588 */
1589 public static final UnicodeBlock GOTHIC =
1590 new UnicodeBlock("GOTHIC");
1591
1592 /**
1593 * Constant for the "Ugaritic" Unicode character block.
1594 * @since 1.5
1595 */
1596 public static final UnicodeBlock UGARITIC =
1597 new UnicodeBlock("UGARITIC");
1598
1599 /**
1600 * Constant for the "Deseret" Unicode character block.
1601 * @since 1.5
1602 */
1603 public static final UnicodeBlock DESERET =
1604 new UnicodeBlock("DESERET");
1605
1606 /**
1607 * Constant for the "Shavian" Unicode character block.
1608 * @since 1.5
1609 */
1610 public static final UnicodeBlock SHAVIAN =
1611 new UnicodeBlock("SHAVIAN");
1612
1613 /**
1614 * Constant for the "Osmanya" Unicode character block.
1615 * @since 1.5
1616 */
1617 public static final UnicodeBlock OSMANYA =
1618 new UnicodeBlock("OSMANYA");
1619
1620 /**
1621 * Constant for the "Cypriot Syllabary" Unicode character block.
1622 * @since 1.5
1623 */
1624 public static final UnicodeBlock CYPRIOT_SYLLABARY =
1625 new UnicodeBlock("CYPRIOT_SYLLABARY",
1626 "CYPRIOT SYLLABARY",
1627 "CYPRIOTSYLLABARY");
1628
1629 /**
1630 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1631 * @since 1.5
1632 */
1633 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1634 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1635 "BYZANTINE MUSICAL SYMBOLS",
1636 "BYZANTINEMUSICALSYMBOLS");
1637
1638 /**
1639 * Constant for the "Musical Symbols" Unicode character block.
1640 * @since 1.5
1641 */
1642 public static final UnicodeBlock MUSICAL_SYMBOLS =
1643 new UnicodeBlock("MUSICAL_SYMBOLS",
1644 "MUSICAL SYMBOLS",
1645 "MUSICALSYMBOLS");
1646
1647 /**
1648 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1649 * @since 1.5
1650 */
1651 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1652 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1653 "TAI XUAN JING SYMBOLS",
1654 "TAIXUANJINGSYMBOLS");
1655
1656 /**
1657 * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1658 * character block.
1659 * @since 1.5
1660 */
1661 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1662 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1663 "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1664 "MATHEMATICALALPHANUMERICSYMBOLS");
1665
1666 /**
1667 * Constant for the "CJK Unified Ideographs Extension B" Unicode
1668 * character block.
1669 * @since 1.5
1670 */
1671 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1672 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1673 "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1674 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1675
1676 /**
1677 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1678 * @since 1.5
1679 */
1680 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1681 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1682 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1683 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1684
1685 /**
1686 * Constant for the "Tags" Unicode character block.
1687 * @since 1.5
1688 */
1689 public static final UnicodeBlock TAGS =
1690 new UnicodeBlock("TAGS");
1691
1692 /**
1693 * Constant for the "Variation Selectors Supplement" Unicode character
1694 * block.
1695 * @since 1.5
1696 */
1697 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1698 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1699 "VARIATION SELECTORS SUPPLEMENT",
1700 "VARIATIONSELECTORSSUPPLEMENT");
1701
1702 /**
1703 * Constant for the "Supplementary Private Use Area-A" Unicode character
1704 * block.
1705 * @since 1.5
1706 */
1707 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1708 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1709 "SUPPLEMENTARY PRIVATE USE AREA-A",
1710 "SUPPLEMENTARYPRIVATEUSEAREA-A");
1711
1712 /**
1713 * Constant for the "Supplementary Private Use Area-B" Unicode character
1714 * block.
1715 * @since 1.5
1716 */
1717 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1718 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1719 "SUPPLEMENTARY PRIVATE USE AREA-B",
1720 "SUPPLEMENTARYPRIVATEUSEAREA-B");
1721
1722 /**
1723 * Constant for the "High Surrogates" Unicode character block.
1724 * This block represents codepoint values in the high surrogate
1725 * range: U+D800 through U+DB7F
1726 *
1727 * @since 1.5
1728 */
1729 public static final UnicodeBlock HIGH_SURROGATES =
1730 new UnicodeBlock("HIGH_SURROGATES",
1731 "HIGH SURROGATES",
1732 "HIGHSURROGATES");
1733
1734 /**
1735 * Constant for the "High Private Use Surrogates" Unicode character
1736 * block.
1737 * This block represents codepoint values in the private use high
1738 * surrogate range: U+DB80 through U+DBFF
1739 *
1740 * @since 1.5
1741 */
1742 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1743 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1744 "HIGH PRIVATE USE SURROGATES",
1745 "HIGHPRIVATEUSESURROGATES");
1746
1747 /**
1748 * Constant for the "Low Surrogates" Unicode character block.
1749 * This block represents codepoint values in the low surrogate
1750 * range: U+DC00 through U+DFFF
1751 *
1752 * @since 1.5
1753 */
1754 public static final UnicodeBlock LOW_SURROGATES =
1755 new UnicodeBlock("LOW_SURROGATES",
1756 "LOW SURROGATES",
1757 "LOWSURROGATES");
1758
1759 /**
1760 * Constant for the "Arabic Supplement" Unicode character block.
1761 * @since 1.7
1762 */
1763 public static final UnicodeBlock ARABIC_SUPPLEMENT =
1764 new UnicodeBlock("ARABIC_SUPPLEMENT",
1765 "ARABIC SUPPLEMENT",
1766 "ARABICSUPPLEMENT");
1767
1768 /**
1769 * Constant for the "NKo" Unicode character block.
1770 * @since 1.7
1771 */
1772 public static final UnicodeBlock NKO =
1773 new UnicodeBlock("NKO");
1774
1775 /**
1776 * Constant for the "Samaritan" Unicode character block.
1777 * @since 1.7
1778 */
1779 public static final UnicodeBlock SAMARITAN =
1780 new UnicodeBlock("SAMARITAN");
1781
1782 /**
1783 * Constant for the "Mandaic" Unicode character block.
1784 * @since 1.7
1785 */
1786 public static final UnicodeBlock MANDAIC =
1787 new UnicodeBlock("MANDAIC");
1788
1789 /**
1790 * Constant for the "Ethiopic Supplement" Unicode character block.
1791 * @since 1.7
1792 */
1793 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1794 new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1795 "ETHIOPIC SUPPLEMENT",
1796 "ETHIOPICSUPPLEMENT");
1797
1798 /**
1799 * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1800 * Unicode character block.
1801 * @since 1.7
1802 */
1803 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1804 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1805 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1806 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1807
1808 /**
1809 * Constant for the "New Tai Lue" Unicode character block.
1810 * @since 1.7
1811 */
1812 public static final UnicodeBlock NEW_TAI_LUE =
1813 new UnicodeBlock("NEW_TAI_LUE",
1814 "NEW TAI LUE",
1815 "NEWTAILUE");
1816
1817 /**
1818 * Constant for the "Buginese" Unicode character block.
1819 * @since 1.7
1820 */
1821 public static final UnicodeBlock BUGINESE =
1822 new UnicodeBlock("BUGINESE");
1823
1824 /**
1825 * Constant for the "Tai Tham" Unicode character block.
1826 * @since 1.7
1827 */
1828 public static final UnicodeBlock TAI_THAM =
1829 new UnicodeBlock("TAI_THAM",
1830 "TAI THAM",
1831 "TAITHAM");
1832
1833 /**
1834 * Constant for the "Balinese" Unicode character block.
1835 * @since 1.7
1836 */
1837 public static final UnicodeBlock BALINESE =
1838 new UnicodeBlock("BALINESE");
1839
1840 /**
1841 * Constant for the "Sundanese" Unicode character block.
1842 * @since 1.7
1843 */
1844 public static final UnicodeBlock SUNDANESE =
1845 new UnicodeBlock("SUNDANESE");
1846
1847 /**
1848 * Constant for the "Batak" Unicode character block.
1849 * @since 1.7
1850 */
1851 public static final UnicodeBlock BATAK =
1852 new UnicodeBlock("BATAK");
1853
1854 /**
1855 * Constant for the "Lepcha" Unicode character block.
1856 * @since 1.7
1857 */
1858 public static final UnicodeBlock LEPCHA =
1859 new UnicodeBlock("LEPCHA");
1860
1861 /**
1862 * Constant for the "Ol Chiki" Unicode character block.
1863 * @since 1.7
1864 */
1865 public static final UnicodeBlock OL_CHIKI =
1866 new UnicodeBlock("OL_CHIKI",
1867 "OL CHIKI",
1868 "OLCHIKI");
1869
1870 /**
1871 * Constant for the "Vedic Extensions" Unicode character block.
1872 * @since 1.7
1873 */
1874 public static final UnicodeBlock VEDIC_EXTENSIONS =
1875 new UnicodeBlock("VEDIC_EXTENSIONS",
1876 "VEDIC EXTENSIONS",
1877 "VEDICEXTENSIONS");
1878
1879 /**
1880 * Constant for the "Phonetic Extensions Supplement" Unicode character
1881 * block.
1882 * @since 1.7
1883 */
1884 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1885 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1886 "PHONETIC EXTENSIONS SUPPLEMENT",
1887 "PHONETICEXTENSIONSSUPPLEMENT");
1888
1889 /**
1890 * Constant for the "Combining Diacritical Marks Supplement" Unicode
1891 * character block.
1892 * @since 1.7
1893 */
1894 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1895 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1896 "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1897 "COMBININGDIACRITICALMARKSSUPPLEMENT");
1898
1899 /**
1900 * Constant for the "Glagolitic" Unicode character block.
1901 * @since 1.7
1902 */
1903 public static final UnicodeBlock GLAGOLITIC =
1904 new UnicodeBlock("GLAGOLITIC");
1905
1906 /**
1907 * Constant for the "Latin Extended-C" Unicode character block.
1908 * @since 1.7
1909 */
1910 public static final UnicodeBlock LATIN_EXTENDED_C =
1911 new UnicodeBlock("LATIN_EXTENDED_C",
1912 "LATIN EXTENDED-C",
1913 "LATINEXTENDED-C");
1914
1915 /**
1916 * Constant for the "Coptic" Unicode character block.
1917 * @since 1.7
1918 */
1919 public static final UnicodeBlock COPTIC =
1920 new UnicodeBlock("COPTIC");
1921
1922 /**
1923 * Constant for the "Georgian Supplement" Unicode character block.
1924 * @since 1.7
1925 */
1926 public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1927 new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1928 "GEORGIAN SUPPLEMENT",
1929 "GEORGIANSUPPLEMENT");
1930
1931 /**
1932 * Constant for the "Tifinagh" Unicode character block.
1933 * @since 1.7
1934 */
1935 public static final UnicodeBlock TIFINAGH =
1936 new UnicodeBlock("TIFINAGH");
1937
1938 /**
1939 * Constant for the "Ethiopic Extended" Unicode character block.
1940 * @since 1.7
1941 */
1942 public static final UnicodeBlock ETHIOPIC_EXTENDED =
1943 new UnicodeBlock("ETHIOPIC_EXTENDED",
1944 "ETHIOPIC EXTENDED",
1945 "ETHIOPICEXTENDED");
1946
1947 /**
1948 * Constant for the "Cyrillic Extended-A" Unicode character block.
1949 * @since 1.7
1950 */
1951 public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1952 new UnicodeBlock("CYRILLIC_EXTENDED_A",
1953 "CYRILLIC EXTENDED-A",
1954 "CYRILLICEXTENDED-A");
1955
1956 /**
1957 * Constant for the "Supplemental Punctuation" Unicode character block.
1958 * @since 1.7
1959 */
1960 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1961 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1962 "SUPPLEMENTAL PUNCTUATION",
1963 "SUPPLEMENTALPUNCTUATION");
1964
1965 /**
1966 * Constant for the "CJK Strokes" Unicode character block.
1967 * @since 1.7
1968 */
1969 public static final UnicodeBlock CJK_STROKES =
1970 new UnicodeBlock("CJK_STROKES",
1971 "CJK STROKES",
1972 "CJKSTROKES");
1973
1974 /**
1975 * Constant for the "Lisu" Unicode character block.
1976 * @since 1.7
1977 */
1978 public static final UnicodeBlock LISU =
1979 new UnicodeBlock("LISU");
1980
1981 /**
1982 * Constant for the "Vai" Unicode character block.
1983 * @since 1.7
1984 */
1985 public static final UnicodeBlock VAI =
1986 new UnicodeBlock("VAI");
1987
1988 /**
1989 * Constant for the "Cyrillic Extended-B" Unicode character block.
1990 * @since 1.7
1991 */
1992 public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1993 new UnicodeBlock("CYRILLIC_EXTENDED_B",
1994 "CYRILLIC EXTENDED-B",
1995 "CYRILLICEXTENDED-B");
1996
1997 /**
1998 * Constant for the "Bamum" Unicode character block.
1999 * @since 1.7
2000 */
2001 public static final UnicodeBlock BAMUM =
2002 new UnicodeBlock("BAMUM");
2003
2004 /**
2005 * Constant for the "Modifier Tone Letters" Unicode character block.
2006 * @since 1.7
2007 */
2008 public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2009 new UnicodeBlock("MODIFIER_TONE_LETTERS",
2010 "MODIFIER TONE LETTERS",
2011 "MODIFIERTONELETTERS");
2012
2013 /**
2014 * Constant for the "Latin Extended-D" Unicode character block.
2015 * @since 1.7
2016 */
2017 public static final UnicodeBlock LATIN_EXTENDED_D =
2018 new UnicodeBlock("LATIN_EXTENDED_D",
2019 "LATIN EXTENDED-D",
2020 "LATINEXTENDED-D");
2021
2022 /**
2023 * Constant for the "Syloti Nagri" Unicode character block.
2024 * @since 1.7
2025 */
2026 public static final UnicodeBlock SYLOTI_NAGRI =
2027 new UnicodeBlock("SYLOTI_NAGRI",
2028 "SYLOTI NAGRI",
2029 "SYLOTINAGRI");
2030
2031 /**
2032 * Constant for the "Common Indic Number Forms" Unicode character block.
2033 * @since 1.7
2034 */
2035 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2036 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2037 "COMMON INDIC NUMBER FORMS",
2038 "COMMONINDICNUMBERFORMS");
2039
2040 /**
2041 * Constant for the "Phags-pa" Unicode character block.
2042 * @since 1.7
2043 */
2044 public static final UnicodeBlock PHAGS_PA =
2045 new UnicodeBlock("PHAGS_PA",
2046 "PHAGS-PA");
2047
2048 /**
2049 * Constant for the "Saurashtra" Unicode character block.
2050 * @since 1.7
2051 */
2052 public static final UnicodeBlock SAURASHTRA =
2053 new UnicodeBlock("SAURASHTRA");
2054
2055 /**
2056 * Constant for the "Devanagari Extended" Unicode character block.
2057 * @since 1.7
2058 */
2059 public static final UnicodeBlock DEVANAGARI_EXTENDED =
2060 new UnicodeBlock("DEVANAGARI_EXTENDED",
2061 "DEVANAGARI EXTENDED",
2062 "DEVANAGARIEXTENDED");
2063
2064 /**
2065 * Constant for the "Kayah Li" Unicode character block.
2066 * @since 1.7
2067 */
2068 public static final UnicodeBlock KAYAH_LI =
2069 new UnicodeBlock("KAYAH_LI",
2070 "KAYAH LI",
2071 "KAYAHLI");
2072
2073 /**
2074 * Constant for the "Rejang" Unicode character block.
2075 * @since 1.7
2076 */
2077 public static final UnicodeBlock REJANG =
2078 new UnicodeBlock("REJANG");
2079
2080 /**
2081 * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2082 * @since 1.7
2083 */
2084 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2085 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2086 "HANGUL JAMO EXTENDED-A",
2087 "HANGULJAMOEXTENDED-A");
2088
2089 /**
2090 * Constant for the "Javanese" Unicode character block.
2091 * @since 1.7
2092 */
2093 public static final UnicodeBlock JAVANESE =
2094 new UnicodeBlock("JAVANESE");
2095
2096 /**
2097 * Constant for the "Cham" Unicode character block.
2098 * @since 1.7
2099 */
2100 public static final UnicodeBlock CHAM =
2101 new UnicodeBlock("CHAM");
2102
2103 /**
2104 * Constant for the "Myanmar Extended-A" Unicode character block.
2105 * @since 1.7
2106 */
2107 public static final UnicodeBlock MYANMAR_EXTENDED_A =
2108 new UnicodeBlock("MYANMAR_EXTENDED_A",
2109 "MYANMAR EXTENDED-A",
2110 "MYANMAREXTENDED-A");
2111
2112 /**
2113 * Constant for the "Tai Viet" Unicode character block.
2114 * @since 1.7
2115 */
2116 public static final UnicodeBlock TAI_VIET =
2117 new UnicodeBlock("TAI_VIET",
2118 "TAI VIET",
2119 "TAIVIET");
2120
2121 /**
2122 * Constant for the "Ethiopic Extended-A" Unicode character block.
2123 * @since 1.7
2124 */
2125 public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2126 new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2127 "ETHIOPIC EXTENDED-A",
2128 "ETHIOPICEXTENDED-A");
2129
2130 /**
2131 * Constant for the "Meetei Mayek" Unicode character block.
2132 * @since 1.7
2133 */
2134 public static final UnicodeBlock MEETEI_MAYEK =
2135 new UnicodeBlock("MEETEI_MAYEK",
2136 "MEETEI MAYEK",
2137 "MEETEIMAYEK");
2138
2139 /**
2140 * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2141 * @since 1.7
2142 */
2143 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2144 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2145 "HANGUL JAMO EXTENDED-B",
2146 "HANGULJAMOEXTENDED-B");
2147
2148 /**
2149 * Constant for the "Vertical Forms" Unicode character block.
2150 * @since 1.7
2151 */
2152 public static final UnicodeBlock VERTICAL_FORMS =
2153 new UnicodeBlock("VERTICAL_FORMS",
2154 "VERTICAL FORMS",
2155 "VERTICALFORMS");
2156
2157 /**
2158 * Constant for the "Ancient Greek Numbers" Unicode character block.
2159 * @since 1.7
2160 */
2161 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2162 new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2163 "ANCIENT GREEK NUMBERS",
2164 "ANCIENTGREEKNUMBERS");
2165
2166 /**
2167 * Constant for the "Ancient Symbols" Unicode character block.
2168 * @since 1.7
2169 */
2170 public static final UnicodeBlock ANCIENT_SYMBOLS =
2171 new UnicodeBlock("ANCIENT_SYMBOLS",
2172 "ANCIENT SYMBOLS",
2173 "ANCIENTSYMBOLS");
2174
2175 /**
2176 * Constant for the "Phaistos Disc" Unicode character block.
2177 * @since 1.7
2178 */
2179 public static final UnicodeBlock PHAISTOS_DISC =
2180 new UnicodeBlock("PHAISTOS_DISC",
2181 "PHAISTOS DISC",
2182 "PHAISTOSDISC");
2183
2184 /**
2185 * Constant for the "Lycian" Unicode character block.
2186 * @since 1.7
2187 */
2188 public static final UnicodeBlock LYCIAN =
2189 new UnicodeBlock("LYCIAN");
2190
2191 /**
2192 * Constant for the "Carian" Unicode character block.
2193 * @since 1.7
2194 */
2195 public static final UnicodeBlock CARIAN =
2196 new UnicodeBlock("CARIAN");
2197
2198 /**
2199 * Constant for the "Old Persian" Unicode character block.
2200 * @since 1.7
2201 */
2202 public static final UnicodeBlock OLD_PERSIAN =
2203 new UnicodeBlock("OLD_PERSIAN",
2204 "OLD PERSIAN",
2205 "OLDPERSIAN");
2206
2207 /**
2208 * Constant for the "Imperial Aramaic" Unicode character block.
2209 * @since 1.7
2210 */
2211 public static final UnicodeBlock IMPERIAL_ARAMAIC =
2212 new UnicodeBlock("IMPERIAL_ARAMAIC",
2213 "IMPERIAL ARAMAIC",
2214 "IMPERIALARAMAIC");
2215
2216 /**
2217 * Constant for the "Phoenician" Unicode character block.
2218 * @since 1.7
2219 */
2220 public static final UnicodeBlock PHOENICIAN =
2221 new UnicodeBlock("PHOENICIAN");
2222
2223 /**
2224 * Constant for the "Lydian" Unicode character block.
2225 * @since 1.7
2226 */
2227 public static final UnicodeBlock LYDIAN =
2228 new UnicodeBlock("LYDIAN");
2229
2230 /**
2231 * Constant for the "Kharoshthi" Unicode character block.
2232 * @since 1.7
2233 */
2234 public static final UnicodeBlock KHAROSHTHI =
2235 new UnicodeBlock("KHAROSHTHI");
2236
2237 /**
2238 * Constant for the "Old South Arabian" Unicode character block.
2239 * @since 1.7
2240 */
2241 public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2242 new UnicodeBlock("OLD_SOUTH_ARABIAN",
2243 "OLD SOUTH ARABIAN",
2244 "OLDSOUTHARABIAN");
2245
2246 /**
2247 * Constant for the "Avestan" Unicode character block.
2248 * @since 1.7
2249 */
2250 public static final UnicodeBlock AVESTAN =
2251 new UnicodeBlock("AVESTAN");
2252
2253 /**
2254 * Constant for the "Inscriptional Parthian" Unicode character block.
2255 * @since 1.7
2256 */
2257 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2258 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2259 "INSCRIPTIONAL PARTHIAN",
2260 "INSCRIPTIONALPARTHIAN");
2261
2262 /**
2263 * Constant for the "Inscriptional Pahlavi" Unicode character block.
2264 * @since 1.7
2265 */
2266 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2267 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2268 "INSCRIPTIONAL PAHLAVI",
2269 "INSCRIPTIONALPAHLAVI");
2270
2271 /**
2272 * Constant for the "Old Turkic" Unicode character block.
2273 * @since 1.7
2274 */
2275 public static final UnicodeBlock OLD_TURKIC =
2276 new UnicodeBlock("OLD_TURKIC",
2277 "OLD TURKIC",
2278 "OLDTURKIC");
2279
2280 /**
2281 * Constant for the "Rumi Numeral Symbols" Unicode character block.
2282 * @since 1.7
2283 */
2284 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2285 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2286 "RUMI NUMERAL SYMBOLS",
2287 "RUMINUMERALSYMBOLS");
2288
2289 /**
2290 * Constant for the "Brahmi" Unicode character block.
2291 * @since 1.7
2292 */
2293 public static final UnicodeBlock BRAHMI =
2294 new UnicodeBlock("BRAHMI");
2295
2296 /**
2297 * Constant for the "Kaithi" Unicode character block.
2298 * @since 1.7
2299 */
2300 public static final UnicodeBlock KAITHI =
2301 new UnicodeBlock("KAITHI");
2302
2303 /**
2304 * Constant for the "Cuneiform" Unicode character block.
2305 * @since 1.7
2306 */
2307 public static final UnicodeBlock CUNEIFORM =
2308 new UnicodeBlock("CUNEIFORM");
2309
2310 /**
2311 * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2312 * character block.
2313 * @since 1.7
2314 */
2315 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2316 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2317 "CUNEIFORM NUMBERS AND PUNCTUATION",
2318 "CUNEIFORMNUMBERSANDPUNCTUATION");
2319
2320 /**
2321 * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2322 * @since 1.7
2323 */
2324 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2325 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2326 "EGYPTIAN HIEROGLYPHS",
2327 "EGYPTIANHIEROGLYPHS");
2328
2329 /**
2330 * Constant for the "Bamum Supplement" Unicode character block.
2331 * @since 1.7
2332 */
2333 public static final UnicodeBlock BAMUM_SUPPLEMENT =
2334 new UnicodeBlock("BAMUM_SUPPLEMENT",
2335 "BAMUM SUPPLEMENT",
2336 "BAMUMSUPPLEMENT");
2337
2338 /**
2339 * Constant for the "Kana Supplement" Unicode character block.
2340 * @since 1.7
2341 */
2342 public static final UnicodeBlock KANA_SUPPLEMENT =
2343 new UnicodeBlock("KANA_SUPPLEMENT",
2344 "KANA SUPPLEMENT",
2345 "KANASUPPLEMENT");
2346
2347 /**
2348 * Constant for the "Ancient Greek Musical Notation" Unicode character
2349 * block.
2350 * @since 1.7
2351 */
2352 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2353 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2354 "ANCIENT GREEK MUSICAL NOTATION",
2355 "ANCIENTGREEKMUSICALNOTATION");
2356
2357 /**
2358 * Constant for the "Counting Rod Numerals" Unicode character block.
2359 * @since 1.7
2360 */
2361 public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2362 new UnicodeBlock("COUNTING_ROD_NUMERALS",
2363 "COUNTING ROD NUMERALS",
2364 "COUNTINGRODNUMERALS");
2365
2366 /**
2367 * Constant for the "Mahjong Tiles" Unicode character block.
2368 * @since 1.7
2369 */
2370 public static final UnicodeBlock MAHJONG_TILES =
2371 new UnicodeBlock("MAHJONG_TILES",
2372 "MAHJONG TILES",
2373 "MAHJONGTILES");
2374
2375 /**
2376 * Constant for the "Domino Tiles" Unicode character block.
2377 * @since 1.7
2378 */
2379 public static final UnicodeBlock DOMINO_TILES =
2380 new UnicodeBlock("DOMINO_TILES",
2381 "DOMINO TILES",
2382 "DOMINOTILES");
2383
2384 /**
2385 * Constant for the "Playing Cards" Unicode character block.
2386 * @since 1.7
2387 */
2388 public static final UnicodeBlock PLAYING_CARDS =
2389 new UnicodeBlock("PLAYING_CARDS",
2390 "PLAYING CARDS",
2391 "PLAYINGCARDS");
2392
2393 /**
2394 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2395 * block.
2396 * @since 1.7
2397 */
2398 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2399 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2400 "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2401 "ENCLOSEDALPHANUMERICSUPPLEMENT");
2402
2403 /**
2404 * Constant for the "Enclosed Ideographic Supplement" Unicode character
2405 * block.
2406 * @since 1.7
2407 */
2408 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2409 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2410 "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2411 "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2412
2413 /**
2414 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2415 * character block.
2416 * @since 1.7
2417 */
2418 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2419 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2420 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2421 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2422
2423 /**
2424 * Constant for the "Emoticons" Unicode character block.
2425 * @since 1.7
2426 */
2427 public static final UnicodeBlock EMOTICONS =
2428 new UnicodeBlock("EMOTICONS");
2429
2430 /**
2431 * Constant for the "Transport And Map Symbols" Unicode character block.
2432 * @since 1.7
2433 */
2434 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2435 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2436 "TRANSPORT AND MAP SYMBOLS",
2437 "TRANSPORTANDMAPSYMBOLS");
2438
2439 /**
2440 * Constant for the "Alchemical Symbols" Unicode character block.
2441 * @since 1.7
2442 */
2443 public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2444 new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2445 "ALCHEMICAL SYMBOLS",
2446 "ALCHEMICALSYMBOLS");
2447
2448 /**
2449 * Constant for the "CJK Unified Ideographs Extension C" Unicode
2450 * character block.
2451 * @since 1.7
2452 */
2453 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2454 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2455 "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2456 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2457
2458 /**
2459 * Constant for the "CJK Unified Ideographs Extension D" Unicode
2460 * character block.
2461 * @since 1.7
2462 */
2463 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2464 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2465 "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2466 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2467
2468 private static final int blockStarts[] = {
2469 0x0000, // 0000..007F; Basic Latin
2470 0x0080, // 0080..00FF; Latin-1 Supplement
2471 0x0100, // 0100..017F; Latin Extended-A
2472 0x0180, // 0180..024F; Latin Extended-B
2473 0x0250, // 0250..02AF; IPA Extensions
2474 0x02B0, // 02B0..02FF; Spacing Modifier Letters
2475 0x0300, // 0300..036F; Combining Diacritical Marks
2476 0x0370, // 0370..03FF; Greek and Coptic
2477 0x0400, // 0400..04FF; Cyrillic
2478 0x0500, // 0500..052F; Cyrillic Supplement
2479 0x0530, // 0530..058F; Armenian
2480 0x0590, // 0590..05FF; Hebrew
2481 0x0600, // 0600..06FF; Arabic
2482 0x0700, // 0700..074F; Syriac
2483 0x0750, // 0750..077F; Arabic Supplement
2484 0x0780, // 0780..07BF; Thaana
2485 0x07C0, // 07C0..07FF; NKo
2486 0x0800, // 0800..083F; Samaritan
2487 0x0840, // 0840..085F; Mandaic
2488 0x0860, // unassigned
2489 0x0900, // 0900..097F; Devanagari
2490 0x0980, // 0980..09FF; Bengali
2491 0x0A00, // 0A00..0A7F; Gurmukhi
2492 0x0A80, // 0A80..0AFF; Gujarati
2493 0x0B00, // 0B00..0B7F; Oriya
2494 0x0B80, // 0B80..0BFF; Tamil
2495 0x0C00, // 0C00..0C7F; Telugu
2496 0x0C80, // 0C80..0CFF; Kannada
2497 0x0D00, // 0D00..0D7F; Malayalam
2498 0x0D80, // 0D80..0DFF; Sinhala
2499 0x0E00, // 0E00..0E7F; Thai
2500 0x0E80, // 0E80..0EFF; Lao
2501 0x0F00, // 0F00..0FFF; Tibetan
2502 0x1000, // 1000..109F; Myanmar
2503 0x10A0, // 10A0..10FF; Georgian
2504 0x1100, // 1100..11FF; Hangul Jamo
2505 0x1200, // 1200..137F; Ethiopic
2506 0x1380, // 1380..139F; Ethiopic Supplement
2507 0x13A0, // 13A0..13FF; Cherokee
2508 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
2509 0x1680, // 1680..169F; Ogham
2510 0x16A0, // 16A0..16FF; Runic
2511 0x1700, // 1700..171F; Tagalog
2512 0x1720, // 1720..173F; Hanunoo
2513 0x1740, // 1740..175F; Buhid
2514 0x1760, // 1760..177F; Tagbanwa
2515 0x1780, // 1780..17FF; Khmer
2516 0x1800, // 1800..18AF; Mongolian
2517 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2518 0x1900, // 1900..194F; Limbu
2519 0x1950, // 1950..197F; Tai Le
2520 0x1980, // 1980..19DF; New Tai Lue
2521 0x19E0, // 19E0..19FF; Khmer Symbols
2522 0x1A00, // 1A00..1A1F; Buginese
2523 0x1A20, // 1A20..1AAF; Tai Tham
2524 0x1AB0, // unassigned
2525 0x1B00, // 1B00..1B7F; Balinese
2526 0x1B80, // 1B80..1BBF; Sundanese
2527 0x1BC0, // 1BC0..1BFF; Batak
2528 0x1C00, // 1C00..1C4F; Lepcha
2529 0x1C50, // 1C50..1C7F; Ol Chiki
2530 0x1C80, // unassigned
2531 0x1CD0, // 1CD0..1CFF; Vedic Extensions
2532 0x1D00, // 1D00..1D7F; Phonetic Extensions
2533 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
2534 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
2535 0x1E00, // 1E00..1EFF; Latin Extended Additional
2536 0x1F00, // 1F00..1FFF; Greek Extended
2537 0x2000, // 2000..206F; General Punctuation
2538 0x2070, // 2070..209F; Superscripts and Subscripts
2539 0x20A0, // 20A0..20CF; Currency Symbols
2540 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
2541 0x2100, // 2100..214F; Letterlike Symbols
2542 0x2150, // 2150..218F; Number Forms
2543 0x2190, // 2190..21FF; Arrows
2544 0x2200, // 2200..22FF; Mathematical Operators
2545 0x2300, // 2300..23FF; Miscellaneous Technical
2546 0x2400, // 2400..243F; Control Pictures
2547 0x2440, // 2440..245F; Optical Character Recognition
2548 0x2460, // 2460..24FF; Enclosed Alphanumerics
2549 0x2500, // 2500..257F; Box Drawing
2550 0x2580, // 2580..259F; Block Elements
2551 0x25A0, // 25A0..25FF; Geometric Shapes
2552 0x2600, // 2600..26FF; Miscellaneous Symbols
2553 0x2700, // 2700..27BF; Dingbats
2554 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2555 0x27F0, // 27F0..27FF; Supplemental Arrows-A
2556 0x2800, // 2800..28FF; Braille Patterns
2557 0x2900, // 2900..297F; Supplemental Arrows-B
2558 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B
2559 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators
2560 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows
2561 0x2C00, // 2C00..2C5F; Glagolitic
2562 0x2C60, // 2C60..2C7F; Latin Extended-C
2563 0x2C80, // 2C80..2CFF; Coptic
2564 0x2D00, // 2D00..2D2F; Georgian Supplement
2565 0x2D30, // 2D30..2D7F; Tifinagh
2566 0x2D80, // 2D80..2DDF; Ethiopic Extended
2567 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A
2568 0x2E00, // 2E00..2E7F; Supplemental Punctuation
2569 0x2E80, // 2E80..2EFF; CJK Radicals Supplement
2570 0x2F00, // 2F00..2FDF; Kangxi Radicals
2571 0x2FE0, // unassigned
2572 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters
2573 0x3000, // 3000..303F; CJK Symbols and Punctuation
2574 0x3040, // 3040..309F; Hiragana
2575 0x30A0, // 30A0..30FF; Katakana
2576 0x3100, // 3100..312F; Bopomofo
2577 0x3130, // 3130..318F; Hangul Compatibility Jamo
2578 0x3190, // 3190..319F; Kanbun
2579 0x31A0, // 31A0..31BF; Bopomofo Extended
2580 0x31C0, // 31C0..31EF; CJK Strokes
2581 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions
2582 0x3200, // 3200..32FF; Enclosed CJK Letters and Months
2583 0x3300, // 3300..33FF; CJK Compatibility
2584 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
2585 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
2586 0x4E00, // 4E00..9FFF; CJK Unified Ideographs
2587 0xA000, // A000..A48F; Yi Syllables
2588 0xA490, // A490..A4CF; Yi Radicals
2589 0xA4D0, // A4D0..A4FF; Lisu
2590 0xA500, // A500..A63F; Vai
2591 0xA640, // A640..A69F; Cyrillic Extended-B
2592 0xA6A0, // A6A0..A6FF; Bamum
2593 0xA700, // A700..A71F; Modifier Tone Letters
2594 0xA720, // A720..A7FF; Latin Extended-D
2595 0xA800, // A800..A82F; Syloti Nagri
2596 0xA830, // A830..A83F; Common Indic Number Forms
2597 0xA840, // A840..A87F; Phags-pa
2598 0xA880, // A880..A8DF; Saurashtra
2599 0xA8E0, // A8E0..A8FF; Devanagari Extended
2600 0xA900, // A900..A92F; Kayah Li
2601 0xA930, // A930..A95F; Rejang
2602 0xA960, // A960..A97F; Hangul Jamo Extended-A
2603 0xA980, // A980..A9DF; Javanese
2604 0xA9E0, // unassigned
2605 0xAA00, // AA00..AA5F; Cham
2606 0xAA60, // AA60..AA7F; Myanmar Extended-A
2607 0xAA80, // AA80..AADF; Tai Viet
2608 0xAAE0, // unassigned
2609 0xAB00, // AB00..AB2F; Ethiopic Extended-A
2610 0xAB30, // unassigned
2611 0xABC0, // ABC0..ABFF; Meetei Mayek
2612 0xAC00, // AC00..D7AF; Hangul Syllables
2613 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
2614 0xD800, // D800..DB7F; High Surrogates
2615 0xDB80, // DB80..DBFF; High Private Use Surrogates
2616 0xDC00, // DC00..DFFF; Low Surrogates
2617 0xE000, // E000..F8FF; Private Use Area
2618 0xF900, // F900..FAFF; CJK Compatibility Ideographs
2619 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
2620 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
2621 0xFE00, // FE00..FE0F; Variation Selectors
2622 0xFE10, // FE10..FE1F; Vertical Forms
2623 0xFE20, // FE20..FE2F; Combining Half Marks
2624 0xFE30, // FE30..FE4F; CJK Compatibility Forms
2625 0xFE50, // FE50..FE6F; Small Form Variants
2626 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
2627 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
2628 0xFFF0, // FFF0..FFFF; Specials
2629 0x10000, // 10000..1007F; Linear B Syllabary
2630 0x10080, // 10080..100FF; Linear B Ideograms
2631 0x10100, // 10100..1013F; Aegean Numbers
2632 0x10140, // 10140..1018F; Ancient Greek Numbers
2633 0x10190, // 10190..101CF; Ancient Symbols
2634 0x101D0, // 101D0..101FF; Phaistos Disc
2635 0x10200, // unassigned
2636 0x10280, // 10280..1029F; Lycian
2637 0x102A0, // 102A0..102DF; Carian
2638 0x102E0, // unassigned
2639 0x10300, // 10300..1032F; Old Italic
2640 0x10330, // 10330..1034F; Gothic
2641 0x10350, // unassigned
2642 0x10380, // 10380..1039F; Ugaritic
2643 0x103A0, // 103A0..103DF; Old Persian
2644 0x103E0, // unassigned
2645 0x10400, // 10400..1044F; Deseret
2646 0x10450, // 10450..1047F; Shavian
2647 0x10480, // 10480..104AF; Osmanya
2648 0x104B0, // unassigned
2649 0x10800, // 10800..1083F; Cypriot Syllabary
2650 0x10840, // 10840..1085F; Imperial Aramaic
2651 0x10860, // unassigned
2652 0x10900, // 10900..1091F; Phoenician
2653 0x10920, // 10920..1093F; Lydian
2654 0x10940, // unassigned
2655 0x10A00, // 10A00..10A5F; Kharoshthi
2656 0x10A60, // 10A60..10A7F; Old South Arabian
2657 0x10A80, // unassigned
2658 0x10B00, // 10B00..10B3F; Avestan
2659 0x10B40, // 10B40..10B5F; Inscriptional Parthian
2660 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
2661 0x10B80, // unassigned
2662 0x10C00, // 10C00..10C4F; Old Turkic
2663 0x10C50, // unassigned
2664 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
2665 0x10E80, // unassigned
2666 0x11000, // 11000..1107F; Brahmi
2667 0x11080, // 11080..110CF; Kaithi
2668 0x110D0, // unassigned
2669 0x12000, // 12000..123FF; Cuneiform
2670 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
2671 0x12480, // unassigned
2672 0x13000, // 13000..1342F; Egyptian Hieroglyphs
2673 0x13430, // unassigned
2674 0x16800, // 16800..16A3F; Bamum Supplement
2675 0x16A40, // unassigned
2676 0x1B000, // 1B000..1B0FF; Kana Supplement
2677 0x1B100, // unassigned
2678 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
2679 0x1D100, // 1D100..1D1FF; Musical Symbols
2680 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
2681 0x1D250, // unassigned
2682 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
2683 0x1D360, // 1D360..1D37F; Counting Rod Numerals
2684 0x1D380, // unassigned
2685 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2686 0x1D800, // unassigned
2687 0x1F000, // 1F000..1F02F; Mahjong Tiles
2688 0x1F030, // 1F030..1F09F; Domino Tiles
2689 0x1F0A0, // 1F0A0..1F0FF; Playing Cards
2690 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2691 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
2692 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2693 0x1F600, // 1F600..1F64F; Emoticons
2694 0x1F650, // unassigned
2695 0x1F680, // 1F680..1F6FF; Transport And Map Symbols
2696 0x1F700, // 1F700..1F77F; Alchemical Symbols
2697 0x1F780, // unassigned
2698 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
2699 0x2A6E0, // unassigned
2700 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
2701 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
2702 0x2B820, // unassigned
2703 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2704 0x2FA20, // unassigned
2705 0xE0000, // E0000..E007F; Tags
2706 0xE0080, // unassigned
2707 0xE0100, // E0100..E01EF; Variation Selectors Supplement
2708 0xE01F0, // unassigned
2709 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
2710 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B
2711 };
2712
2713 private static final UnicodeBlock[] blocks = {
2714 BASIC_LATIN,
2715 LATIN_1_SUPPLEMENT,
2716 LATIN_EXTENDED_A,
2717 LATIN_EXTENDED_B,
2718 IPA_EXTENSIONS,
2719 SPACING_MODIFIER_LETTERS,
2720 COMBINING_DIACRITICAL_MARKS,
2721 GREEK,
2722 CYRILLIC,
2723 CYRILLIC_SUPPLEMENTARY,
2724 ARMENIAN,
2725 HEBREW,
2726 ARABIC,
2727 SYRIAC,
2728 ARABIC_SUPPLEMENT,
2729 THAANA,
2730 NKO,
2731 SAMARITAN,
2732 MANDAIC,
2733 null,
2734 DEVANAGARI,
2735 BENGALI,
2736 GURMUKHI,
2737 GUJARATI,
2738 ORIYA,
2739 TAMIL,
2740 TELUGU,
2741 KANNADA,
2742 MALAYALAM,
2743 SINHALA,
2744 THAI,
2745 LAO,
2746 TIBETAN,
2747 MYANMAR,
2748 GEORGIAN,
2749 HANGUL_JAMO,
2750 ETHIOPIC,
2751 ETHIOPIC_SUPPLEMENT,
2752 CHEROKEE,
2753 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2754 OGHAM,
2755 RUNIC,
2756 TAGALOG,
2757 HANUNOO,
2758 BUHID,
2759 TAGBANWA,
2760 KHMER,
2761 MONGOLIAN,
2762 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2763 LIMBU,
2764 TAI_LE,
2765 NEW_TAI_LUE,
2766 KHMER_SYMBOLS,
2767 BUGINESE,
2768 TAI_THAM,
2769 null,
2770 BALINESE,
2771 SUNDANESE,
2772 BATAK,
2773 LEPCHA,
2774 OL_CHIKI,
2775 null,
2776 VEDIC_EXTENSIONS,
2777 PHONETIC_EXTENSIONS,
2778 PHONETIC_EXTENSIONS_SUPPLEMENT,
2779 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2780 LATIN_EXTENDED_ADDITIONAL,
2781 GREEK_EXTENDED,
2782 GENERAL_PUNCTUATION,
2783 SUPERSCRIPTS_AND_SUBSCRIPTS,
2784 CURRENCY_SYMBOLS,
2785 COMBINING_MARKS_FOR_SYMBOLS,
2786 LETTERLIKE_SYMBOLS,
2787 NUMBER_FORMS,
2788 ARROWS,
2789 MATHEMATICAL_OPERATORS,
2790 MISCELLANEOUS_TECHNICAL,
2791 CONTROL_PICTURES,
2792 OPTICAL_CHARACTER_RECOGNITION,
2793 ENCLOSED_ALPHANUMERICS,
2794 BOX_DRAWING,
2795 BLOCK_ELEMENTS,
2796 GEOMETRIC_SHAPES,
2797 MISCELLANEOUS_SYMBOLS,
2798 DINGBATS,
2799 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2800 SUPPLEMENTAL_ARROWS_A,
2801 BRAILLE_PATTERNS,
2802 SUPPLEMENTAL_ARROWS_B,
2803 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2804 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2805 MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2806 GLAGOLITIC,
2807 LATIN_EXTENDED_C,
2808 COPTIC,
2809 GEORGIAN_SUPPLEMENT,
2810 TIFINAGH,
2811 ETHIOPIC_EXTENDED,
2812 CYRILLIC_EXTENDED_A,
2813 SUPPLEMENTAL_PUNCTUATION,
2814 CJK_RADICALS_SUPPLEMENT,
2815 KANGXI_RADICALS,
2816 null,
2817 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2818 CJK_SYMBOLS_AND_PUNCTUATION,
2819 HIRAGANA,
2820 KATAKANA,
2821 BOPOMOFO,
2822 HANGUL_COMPATIBILITY_JAMO,
2823 KANBUN,
2824 BOPOMOFO_EXTENDED,
2825 CJK_STROKES,
2826 KATAKANA_PHONETIC_EXTENSIONS,
2827 ENCLOSED_CJK_LETTERS_AND_MONTHS,
2828 CJK_COMPATIBILITY,
2829 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2830 YIJING_HEXAGRAM_SYMBOLS,
2831 CJK_UNIFIED_IDEOGRAPHS,
2832 YI_SYLLABLES,
2833 YI_RADICALS,
2834 LISU,
2835 VAI,
2836 CYRILLIC_EXTENDED_B,
2837 BAMUM,
2838 MODIFIER_TONE_LETTERS,
2839 LATIN_EXTENDED_D,
2840 SYLOTI_NAGRI,
2841 COMMON_INDIC_NUMBER_FORMS,
2842 PHAGS_PA,
2843 SAURASHTRA,
2844 DEVANAGARI_EXTENDED,
2845 KAYAH_LI,
2846 REJANG,
2847 HANGUL_JAMO_EXTENDED_A,
2848 JAVANESE,
2849 null,
2850 CHAM,
2851 MYANMAR_EXTENDED_A,
2852 TAI_VIET,
2853 null,
2854 ETHIOPIC_EXTENDED_A,
2855 null,
2856 MEETEI_MAYEK,
2857 HANGUL_SYLLABLES,
2858 HANGUL_JAMO_EXTENDED_B,
2859 HIGH_SURROGATES,
2860 HIGH_PRIVATE_USE_SURROGATES,
2861 LOW_SURROGATES,
2862 PRIVATE_USE_AREA,
2863 CJK_COMPATIBILITY_IDEOGRAPHS,
2864 ALPHABETIC_PRESENTATION_FORMS,
2865 ARABIC_PRESENTATION_FORMS_A,
2866 VARIATION_SELECTORS,
2867 VERTICAL_FORMS,
2868 COMBINING_HALF_MARKS,
2869 CJK_COMPATIBILITY_FORMS,
2870 SMALL_FORM_VARIANTS,
2871 ARABIC_PRESENTATION_FORMS_B,
2872 HALFWIDTH_AND_FULLWIDTH_FORMS,
2873 SPECIALS,
2874 LINEAR_B_SYLLABARY,
2875 LINEAR_B_IDEOGRAMS,
2876 AEGEAN_NUMBERS,
2877 ANCIENT_GREEK_NUMBERS,
2878 ANCIENT_SYMBOLS,
2879 PHAISTOS_DISC,
2880 null,
2881 LYCIAN,
2882 CARIAN,
2883 null,
2884 OLD_ITALIC,
2885 GOTHIC,
2886 null,
2887 UGARITIC,
2888 OLD_PERSIAN,
2889 null,
2890 DESERET,
2891 SHAVIAN,
2892 OSMANYA,
2893 null,
2894 CYPRIOT_SYLLABARY,
2895 IMPERIAL_ARAMAIC,
2896 null,
2897 PHOENICIAN,
2898 LYDIAN,
2899 null,
2900 KHAROSHTHI,
2901 OLD_SOUTH_ARABIAN,
2902 null,
2903 AVESTAN,
2904 INSCRIPTIONAL_PARTHIAN,
2905 INSCRIPTIONAL_PAHLAVI,
2906 null,
2907 OLD_TURKIC,
2908 null,
2909 RUMI_NUMERAL_SYMBOLS,
2910 null,
2911 BRAHMI,
2912 KAITHI,
2913 null,
2914 CUNEIFORM,
2915 CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2916 null,
2917 EGYPTIAN_HIEROGLYPHS,
2918 null,
2919 BAMUM_SUPPLEMENT,
2920 null,
2921 KANA_SUPPLEMENT,
2922 null,
2923 BYZANTINE_MUSICAL_SYMBOLS,
2924 MUSICAL_SYMBOLS,
2925 ANCIENT_GREEK_MUSICAL_NOTATION,
2926 null,
2927 TAI_XUAN_JING_SYMBOLS,
2928 COUNTING_ROD_NUMERALS,
2929 null,
2930 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2931 null,
2932 MAHJONG_TILES,
2933 DOMINO_TILES,
2934 PLAYING_CARDS,
2935 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
2936 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
2937 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
2938 EMOTICONS,
2939 null,
2940 TRANSPORT_AND_MAP_SYMBOLS,
2941 ALCHEMICAL_SYMBOLS,
2942 null,
2943 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2944 null,
2945 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
2946 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
2947 null,
2948 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2949 null,
2950 TAGS,
2951 null,
2952 VARIATION_SELECTORS_SUPPLEMENT,
2953 null,
2954 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2955 SUPPLEMENTARY_PRIVATE_USE_AREA_B
2956 };
2957
2958
2959 /**
2960 * Returns the object representing the Unicode block containing the
2961 * given character, or {@code null} if the character is not a
2962 * member of a defined block.
2963 *
2964 * <p><b>Note:</b> This method cannot handle
2965 * <a href="Character.html#supplementary"> supplementary
2966 * characters</a>. To support all Unicode characters, including
2967 * supplementary characters, use the {@link #of(int)} method.
2968 *
2969 * @param c The character in question
2970 * @return The {@code UnicodeBlock} instance representing the
2971 * Unicode block of which this character is a member, or
2972 * {@code null} if the character is not a member of any
2973 * Unicode block
2974 */
2975 public static UnicodeBlock of(char c) {
2976 return of((int)c);
2977 }
2978
2979 /**
2980 * Returns the object representing the Unicode block
2981 * containing the given character (Unicode code point), or
2982 * {@code null} if the character is not a member of a
2983 * defined block.
2984 *
2985 * @param codePoint the character (Unicode code point) in question.
2986 * @return The {@code UnicodeBlock} instance representing the
2987 * Unicode block of which this character is a member, or
2988 * {@code null} if the character is not a member of any
2989 * Unicode block
2990 * @exception IllegalArgumentException if the specified
2991 * {@code codePoint} is an invalid Unicode code point.
2992 * @see Character#isValidCodePoint(int)
2993 * @since 1.5
2994 */
2995 public static UnicodeBlock of(int codePoint) {
2996 if (!isValidCodePoint(codePoint)) {
2997 throw new IllegalArgumentException();
2998 }
2999
3000 int top, bottom, current;
3001 bottom = 0;
3002 top = blockStarts.length;
3003 current = top/2;
3004
3005 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3006 while (top - bottom > 1) {
3007 if (codePoint >= blockStarts[current]) {
3008 bottom = current;
3009 } else {
3010 top = current;
3011 }
3012 current = (top + bottom) / 2;
3013 }
3014 return blocks[current];
3015 }
3016
3017 /**
3018 * Returns the UnicodeBlock with the given name. Block
3019 * names are determined by The Unicode Standard. The file
3020 * Blocks-<version>.txt defines blocks for a particular
3021 * version of the standard. The {@link Character} class specifies
3022 * the version of the standard that it supports.
3023 * <p>
3024 * This method accepts block names in the following forms:
3025 * <ol>
3026 * <li> Canonical block names as defined by the Unicode Standard.
3027 * For example, the standard defines a "Basic Latin" block. Therefore, this
3028 * method accepts "Basic Latin" as a valid block name. The documentation of
3029 * each UnicodeBlock provides the canonical name.
3030 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3031 * is a valid block name for the "Basic Latin" block.
3032 * <li>The text representation of each constant UnicodeBlock identifier.
3033 * For example, this method will return the {@link #BASIC_LATIN} block if
3034 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3035 * hyphens in the canonical name with underscores.
3036 * </ol>
3037 * Finally, character case is ignored for all of the valid block name forms.
3038 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3039 * The en_US locale's case mapping rules are used to provide case-insensitive
3040 * string comparisons for block name validation.
3041 * <p>
3042 * If the Unicode Standard changes block names, both the previous and
3043 * current names will be accepted.
3044 *
3045 * @param blockName A {@code UnicodeBlock} name.
3046 * @return The {@code UnicodeBlock} instance identified
3047 * by {@code blockName}
3048 * @throws IllegalArgumentException if {@code blockName} is an
3049 * invalid name
3050 * @throws NullPointerException if {@code blockName} is null
3051 * @since 1.5
3052 */
3053 public static final UnicodeBlock forName(String blockName) {
3054 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3055 if (block == null) {
3056 throw new IllegalArgumentException();
3057 }
3058 return block;
3059 }
3060 }
3061
3062
3063 /**
3064 * A family of character subsets representing the character scripts
3065 * defined in the <a href="http://www.unicode.org/reports/tr24/">
3066 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3067 * character is assigned to a single Unicode script, either a specific
3068 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3069 * one of the following three special values,
3070 * {@link Character.UnicodeScript#INHERITED Inherited},
3071 * {@link Character.UnicodeScript#COMMON Common} or
3072 * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3073 *
3074 * @since 1.7
3075 */
3076 public static enum UnicodeScript {
3077 /**
3078 * Unicode script "Common".
3079 */
3080 COMMON,
3081
3082 /**
3083 * Unicode script "Latin".
3084 */
3085 LATIN,
3086
3087 /**
3088 * Unicode script "Greek".
3089 */
3090 GREEK,
3091
3092 /**
3093 * Unicode script "Cyrillic".
3094 */
3095 CYRILLIC,
3096
3097 /**
3098 * Unicode script "Armenian".
3099 */
3100 ARMENIAN,
3101
3102 /**
3103 * Unicode script "Hebrew".
3104 */
3105 HEBREW,
3106
3107 /**
3108 * Unicode script "Arabic".
3109 */
3110 ARABIC,
3111
3112 /**
3113 * Unicode script "Syriac".
3114 */
3115 SYRIAC,
3116
3117 /**
3118 * Unicode script "Thaana".
3119 */
3120 THAANA,
3121
3122 /**
3123 * Unicode script "Devanagari".
3124 */
3125 DEVANAGARI,
3126
3127 /**
3128 * Unicode script "Bengali".
3129 */
3130 BENGALI,
3131
3132 /**
3133 * Unicode script "Gurmukhi".
3134 */
3135 GURMUKHI,
3136
3137 /**
3138 * Unicode script "Gujarati".
3139 */
3140 GUJARATI,
3141
3142 /**
3143 * Unicode script "Oriya".
3144 */
3145 ORIYA,
3146
3147 /**
3148 * Unicode script "Tamil".
3149 */
3150 TAMIL,
3151
3152 /**
3153 * Unicode script "Telugu".
3154 */
3155 TELUGU,
3156
3157 /**
3158 * Unicode script "Kannada".
3159 */
3160 KANNADA,
3161
3162 /**
3163 * Unicode script "Malayalam".
3164 */
3165 MALAYALAM,
3166
3167 /**
3168 * Unicode script "Sinhala".
3169 */
3170 SINHALA,
3171
3172 /**
3173 * Unicode script "Thai".
3174 */
3175 THAI,
3176
3177 /**
3178 * Unicode script "Lao".
3179 */
3180 LAO,
3181
3182 /**
3183 * Unicode script "Tibetan".
3184 */
3185 TIBETAN,
3186
3187 /**
3188 * Unicode script "Myanmar".
3189 */
3190 MYANMAR,
3191
3192 /**
3193 * Unicode script "Georgian".
3194 */
3195 GEORGIAN,
3196
3197 /**
3198 * Unicode script "Hangul".
3199 */
3200 HANGUL,
3201
3202 /**
3203 * Unicode script "Ethiopic".
3204 */
3205 ETHIOPIC,
3206
3207 /**
3208 * Unicode script "Cherokee".
3209 */
3210 CHEROKEE,
3211
3212 /**
3213 * Unicode script "Canadian_Aboriginal".
3214 */
3215 CANADIAN_ABORIGINAL,
3216
3217 /**
3218 * Unicode script "Ogham".
3219 */
3220 OGHAM,
3221
3222 /**
3223 * Unicode script "Runic".
3224 */
3225 RUNIC,
3226
3227 /**
3228 * Unicode script "Khmer".
3229 */
3230 KHMER,
3231
3232 /**
3233 * Unicode script "Mongolian".
3234 */
3235 MONGOLIAN,
3236
3237 /**
3238 * Unicode script "Hiragana".
3239 */
3240 HIRAGANA,
3241
3242 /**
3243 * Unicode script "Katakana".
3244 */
3245 KATAKANA,
3246
3247 /**
3248 * Unicode script "Bopomofo".
3249 */
3250 BOPOMOFO,
3251
3252 /**
3253 * Unicode script "Han".
3254 */
3255 HAN,
3256
3257 /**
3258 * Unicode script "Yi".
3259 */
3260 YI,
3261
3262 /**
3263 * Unicode script "Old_Italic".
3264 */
3265 OLD_ITALIC,
3266
3267 /**
3268 * Unicode script "Gothic".
3269 */
3270 GOTHIC,
3271
3272 /**
3273 * Unicode script "Deseret".
3274 */
3275 DESERET,
3276
3277 /**
3278 * Unicode script "Inherited".
3279 */
3280 INHERITED,
3281
3282 /**
3283 * Unicode script "Tagalog".
3284 */
3285 TAGALOG,
3286
3287 /**
3288 * Unicode script "Hanunoo".
3289 */
3290 HANUNOO,
3291
3292 /**
3293 * Unicode script "Buhid".
3294 */
3295 BUHID,
3296
3297 /**
3298 * Unicode script "Tagbanwa".
3299 */
3300 TAGBANWA,
3301
3302 /**
3303 * Unicode script "Limbu".
3304 */
3305 LIMBU,
3306
3307 /**
3308 * Unicode script "Tai_Le".
3309 */
3310 TAI_LE,
3311
3312 /**
3313 * Unicode script "Linear_B".
3314 */
3315 LINEAR_B,
3316
3317 /**
3318 * Unicode script "Ugaritic".
3319 */
3320 UGARITIC,
3321
3322 /**
3323 * Unicode script "Shavian".
3324 */
3325 SHAVIAN,
3326
3327 /**
3328 * Unicode script "Osmanya".
3329 */
3330 OSMANYA,
3331
3332 /**
3333 * Unicode script "Cypriot".
3334 */
3335 CYPRIOT,
3336
3337 /**
3338 * Unicode script "Braille".
3339 */
3340 BRAILLE,
3341
3342 /**
3343 * Unicode script "Buginese".
3344 */
3345 BUGINESE,
3346
3347 /**
3348 * Unicode script "Coptic".
3349 */
3350 COPTIC,
3351
3352 /**
3353 * Unicode script "New_Tai_Lue".
3354 */
3355 NEW_TAI_LUE,
3356
3357 /**
3358 * Unicode script "Glagolitic".
3359 */
3360 GLAGOLITIC,
3361
3362 /**
3363 * Unicode script "Tifinagh".
3364 */
3365 TIFINAGH,
3366
3367 /**
3368 * Unicode script "Syloti_Nagri".
3369 */
3370 SYLOTI_NAGRI,
3371
3372 /**
3373 * Unicode script "Old_Persian".
3374 */
3375 OLD_PERSIAN,
3376
3377 /**
3378 * Unicode script "Kharoshthi".
3379 */
3380 KHAROSHTHI,
3381
3382 /**
3383 * Unicode script "Balinese".
3384 */
3385 BALINESE,
3386
3387 /**
3388 * Unicode script "Cuneiform".
3389 */
3390 CUNEIFORM,
3391
3392 /**
3393 * Unicode script "Phoenician".
3394 */
3395 PHOENICIAN,
3396
3397 /**
3398 * Unicode script "Phags_Pa".
3399 */
3400 PHAGS_PA,
3401
3402 /**
3403 * Unicode script "Nko".
3404 */
3405 NKO,
3406
3407 /**
3408 * Unicode script "Sundanese".
3409 */
3410 SUNDANESE,
3411
3412 /**
3413 * Unicode script "Batak".
3414 */
3415 BATAK,
3416
3417 /**
3418 * Unicode script "Lepcha".
3419 */
3420 LEPCHA,
3421
3422 /**
3423 * Unicode script "Ol_Chiki".
3424 */
3425 OL_CHIKI,
3426
3427 /**
3428 * Unicode script "Vai".
3429 */
3430 VAI,
3431
3432 /**
3433 * Unicode script "Saurashtra".
3434 */
3435 SAURASHTRA,
3436
3437 /**
3438 * Unicode script "Kayah_Li".
3439 */
3440 KAYAH_LI,
3441
3442 /**
3443 * Unicode script "Rejang".
3444 */
3445 REJANG,
3446
3447 /**
3448 * Unicode script "Lycian".
3449 */
3450 LYCIAN,
3451
3452 /**
3453 * Unicode script "Carian".
3454 */
3455 CARIAN,
3456
3457 /**
3458 * Unicode script "Lydian".
3459 */
3460 LYDIAN,
3461
3462 /**
3463 * Unicode script "Cham".
3464 */
3465 CHAM,
3466
3467 /**
3468 * Unicode script "Tai_Tham".
3469 */
3470 TAI_THAM,
3471
3472 /**
3473 * Unicode script "Tai_Viet".
3474 */
3475 TAI_VIET,
3476
3477 /**
3478 * Unicode script "Avestan".
3479 */
3480 AVESTAN,
3481
3482 /**
3483 * Unicode script "Egyptian_Hieroglyphs".
3484 */
3485 EGYPTIAN_HIEROGLYPHS,
3486
3487 /**
3488 * Unicode script "Samaritan".
3489 */
3490 SAMARITAN,
3491
3492 /**
3493 * Unicode script "Mandaic".
3494 */
3495 MANDAIC,
3496
3497 /**
3498 * Unicode script "Lisu".
3499 */
3500 LISU,
3501
3502 /**
3503 * Unicode script "Bamum".
3504 */
3505 BAMUM,
3506
3507 /**
3508 * Unicode script "Javanese".
3509 */
3510 JAVANESE,
3511
3512 /**
3513 * Unicode script "Meetei_Mayek".
3514 */
3515 MEETEI_MAYEK,
3516
3517 /**
3518 * Unicode script "Imperial_Aramaic".
3519 */
3520 IMPERIAL_ARAMAIC,
3521
3522 /**
3523 * Unicode script "Old_South_Arabian".
3524 */
3525 OLD_SOUTH_ARABIAN,
3526
3527 /**
3528 * Unicode script "Inscriptional_Parthian".
3529 */
3530 INSCRIPTIONAL_PARTHIAN,
3531
3532 /**
3533 * Unicode script "Inscriptional_Pahlavi".
3534 */
3535 INSCRIPTIONAL_PAHLAVI,
3536
3537 /**
3538 * Unicode script "Old_Turkic".
3539 */
3540 OLD_TURKIC,
3541
3542 /**
3543 * Unicode script "Brahmi".
3544 */
3545 BRAHMI,
3546
3547 /**
3548 * Unicode script "Kaithi".
3549 */
3550 KAITHI,
3551
3552 /**
3553 * Unicode script "Unknown".
3554 */
3555 UNKNOWN;
3556
3557 private static final int[] scriptStarts = {
3558 0x0000, // 0000..0040; COMMON
3559 0x0041, // 0041..005A; LATIN
3560 0x005B, // 005B..0060; COMMON
3561 0x0061, // 0061..007A; LATIN
3562 0x007B, // 007B..00A9; COMMON
3563 0x00AA, // 00AA..00AA; LATIN
3564 0x00AB, // 00AB..00B9; COMMON
3565 0x00BA, // 00BA..00BA; LATIN
3566 0x00BB, // 00BB..00BF; COMMON
3567 0x00C0, // 00C0..00D6; LATIN
3568 0x00D7, // 00D7..00D7; COMMON
3569 0x00D8, // 00D8..00F6; LATIN
3570 0x00F7, // 00F7..00F7; COMMON
3571 0x00F8, // 00F8..02B8; LATIN
3572 0x02B9, // 02B9..02DF; COMMON
3573 0x02E0, // 02E0..02E4; LATIN
3574 0x02E5, // 02E5..02E9; COMMON
3575 0x02EA, // 02EA..02EB; BOPOMOFO
3576 0x02EC, // 02EC..02FF; COMMON
3577 0x0300, // 0300..036F; INHERITED
3578 0x0370, // 0370..0373; GREEK
3579 0x0374, // 0374..0374; COMMON
3580 0x0375, // 0375..037D; GREEK
3581 0x037E, // 037E..0383; COMMON
3582 0x0384, // 0384..0384; GREEK
3583 0x0385, // 0385..0385; COMMON
3584 0x0386, // 0386..0386; GREEK
3585 0x0387, // 0387..0387; COMMON
3586 0x0388, // 0388..03E1; GREEK
3587 0x03E2, // 03E2..03EF; COPTIC
3588 0x03F0, // 03F0..03FF; GREEK
3589 0x0400, // 0400..0484; CYRILLIC
3590 0x0485, // 0485..0486; INHERITED
3591 0x0487, // 0487..0530; CYRILLIC
3592 0x0531, // 0531..0588; ARMENIAN
3593 0x0589, // 0589..0589; COMMON
3594 0x058A, // 058A..0590; ARMENIAN
3595 0x0591, // 0591..05FF; HEBREW
3596 0x0600, // 0600..060B; ARABIC
3597 0x060C, // 060C..060C; COMMON
3598 0x060D, // 060D..061A; ARABIC
3599 0x061B, // 061B..061D; COMMON
3600 0x061E, // 061E..061E; ARABIC
3601 0x061F, // 061F..061F; COMMON
3602 0x0620, // 0620..063F; ARABIC
3603 0x0640, // 0640..0640; COMMON
3604 0x0641, // 0641..064A; ARABIC
3605 0x064B, // 064B..0655; INHERITED
3606 0x0656, // 0656..065E; ARABIC
3607 0x065F, // 065F..065F; INHERITED
3608 0x0660, // 0660..0669; COMMON
3609 0x066A, // 066A..066F; ARABIC
3610 0x0670, // 0670..0670; INHERITED
3611 0x0671, // 0671..06DC; ARABIC
3612 0x06DD, // 06DD..06DD; COMMON
3613 0x06DE, // 06DE..06FF; ARABIC
3614 0x0700, // 0700..074F; SYRIAC
3615 0x0750, // 0750..077F; ARABIC
3616 0x0780, // 0780..07BF; THAANA
3617 0x07C0, // 07C0..07FF; NKO
3618 0x0800, // 0800..083F; SAMARITAN
3619 0x0840, // 0840..08FF; MANDAIC
3620 0x0900, // 0900..0950; DEVANAGARI
3621 0x0951, // 0951..0952; INHERITED
3622 0x0953, // 0953..0963; DEVANAGARI
3623 0x0964, // 0964..0965; COMMON
3624 0x0966, // 0966..096F; DEVANAGARI
3625 0x0970, // 0970..0970; COMMON
3626 0x0971, // 0971..0980; DEVANAGARI
3627 0x0981, // 0981..0A00; BENGALI
3628 0x0A01, // 0A01..0A80; GURMUKHI
3629 0x0A81, // 0A81..0B00; GUJARATI
3630 0x0B01, // 0B01..0B81; ORIYA
3631 0x0B82, // 0B82..0C00; TAMIL
3632 0x0C01, // 0C01..0C81; TELUGU
3633 0x0C82, // 0C82..0CF0; KANNADA
3634 0x0D02, // 0D02..0D81; MALAYALAM
3635 0x0D82, // 0D82..0E00; SINHALA
3636 0x0E01, // 0E01..0E3E; THAI
3637 0x0E3F, // 0E3F..0E3F; COMMON
3638 0x0E40, // 0E40..0E80; THAI
3639 0x0E81, // 0E81..0EFF; LAO
3640 0x0F00, // 0F00..0FD4; TIBETAN
3641 0x0FD5, // 0FD5..0FD8; COMMON
3642 0x0FD9, // 0FD9..0FFF; TIBETAN
3643 0x1000, // 1000..109F; MYANMAR
3644 0x10A0, // 10A0..10FA; GEORGIAN
3645 0x10FB, // 10FB..10FB; COMMON
3646 0x10FC, // 10FC..10FF; GEORGIAN
3647 0x1100, // 1100..11FF; HANGUL
3648 0x1200, // 1200..139F; ETHIOPIC
3649 0x13A0, // 13A0..13FF; CHEROKEE
3650 0x1400, // 1400..167F; CANADIAN_ABORIGINAL
3651 0x1680, // 1680..169F; OGHAM
3652 0x16A0, // 16A0..16EA; RUNIC
3653 0x16EB, // 16EB..16ED; COMMON
3654 0x16EE, // 16EE..16FF; RUNIC
3655 0x1700, // 1700..171F; TAGALOG
3656 0x1720, // 1720..1734; HANUNOO
3657 0x1735, // 1735..173F; COMMON
3658 0x1740, // 1740..175F; BUHID
3659 0x1760, // 1760..177F; TAGBANWA
3660 0x1780, // 1780..17FF; KHMER
3661 0x1800, // 1800..1801; MONGOLIAN
3662 0x1802, // 1802..1803; COMMON
3663 0x1804, // 1804..1804; MONGOLIAN
3664 0x1805, // 1805..1805; COMMON
3665 0x1806, // 1806..18AF; MONGOLIAN
3666 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL
3667 0x1900, // 1900..194F; LIMBU
3668 0x1950, // 1950..197F; TAI_LE
3669 0x1980, // 1980..19DF; NEW_TAI_LUE
3670 0x19E0, // 19E0..19FF; KHMER
3671 0x1A00, // 1A00..1A1F; BUGINESE
3672 0x1A20, // 1A20..1AFF; TAI_THAM
3673 0x1B00, // 1B00..1B7F; BALINESE
3674 0x1B80, // 1B80..1BBF; SUNDANESE
3675 0x1BC0, // 1BC0..1BFF; BATAK
3676 0x1C00, // 1C00..1C4F; LEPCHA
3677 0x1C50, // 1C50..1CCF; OL_CHIKI
3678 0x1CD0, // 1CD0..1CD2; INHERITED
3679 0x1CD3, // 1CD3..1CD3; COMMON
3680 0x1CD4, // 1CD4..1CE0; INHERITED
3681 0x1CE1, // 1CE1..1CE1; COMMON
3682 0x1CE2, // 1CE2..1CE8; INHERITED
3683 0x1CE9, // 1CE9..1CEC; COMMON
3684 0x1CED, // 1CED..1CED; INHERITED
3685 0x1CEE, // 1CEE..1CFF; COMMON
3686 0x1D00, // 1D00..1D25; LATIN
3687 0x1D26, // 1D26..1D2A; GREEK
3688 0x1D2B, // 1D2B..1D2B; CYRILLIC
3689 0x1D2C, // 1D2C..1D5C; LATIN
3690 0x1D5D, // 1D5D..1D61; GREEK
3691 0x1D62, // 1D62..1D65; LATIN
3692 0x1D66, // 1D66..1D6A; GREEK
3693 0x1D6B, // 1D6B..1D77; LATIN
3694 0x1D78, // 1D78..1D78; CYRILLIC
3695 0x1D79, // 1D79..1DBE; LATIN
3696 0x1DBF, // 1DBF..1DBF; GREEK
3697 0x1DC0, // 1DC0..1DFF; INHERITED
3698 0x1E00, // 1E00..1EFF; LATIN
3699 0x1F00, // 1F00..1FFF; GREEK
3700 0x2000, // 2000..200B; COMMON
3701 0x200C, // 200C..200D; INHERITED
3702 0x200E, // 200E..2070; COMMON
3703 0x2071, // 2071..2073; LATIN
3704 0x2074, // 2074..207E; COMMON
3705 0x207F, // 207F..207F; LATIN
3706 0x2080, // 2080..208F; COMMON
3707 0x2090, // 2090..209F; LATIN
3708 0x20A0, // 20A0..20CF; COMMON
3709 0x20D0, // 20D0..20FF; INHERITED
3710 0x2100, // 2100..2125; COMMON
3711 0x2126, // 2126..2126; GREEK
3712 0x2127, // 2127..2129; COMMON
3713 0x212A, // 212A..212B; LATIN
3714 0x212C, // 212C..2131; COMMON
3715 0x2132, // 2132..2132; LATIN
3716 0x2133, // 2133..214D; COMMON
3717 0x214E, // 214E..214E; LATIN
3718 0x214F, // 214F..215F; COMMON
3719 0x2160, // 2160..2188; LATIN
3720 0x2189, // 2189..27FF; COMMON
3721 0x2800, // 2800..28FF; BRAILLE
3722 0x2900, // 2900..2BFF; COMMON
3723 0x2C00, // 2C00..2C5F; GLAGOLITIC
3724 0x2C60, // 2C60..2C7F; LATIN
3725 0x2C80, // 2C80..2CFF; COPTIC
3726 0x2D00, // 2D00..2D2F; GEORGIAN
3727 0x2D30, // 2D30..2D7F; TIFINAGH
3728 0x2D80, // 2D80..2DDF; ETHIOPIC
3729 0x2DE0, // 2DE0..2DFF; CYRILLIC
3730 0x2E00, // 2E00..2E7F; COMMON
3731 0x2E80, // 2E80..2FEF; HAN
3732 0x2FF0, // 2FF0..3004; COMMON
3733 0x3005, // 3005..3005; HAN
3734 0x3006, // 3006..3006; COMMON
3735 0x3007, // 3007..3007; HAN
3736 0x3008, // 3008..3020; COMMON
3737 0x3021, // 3021..3029; HAN
3738 0x302A, // 302A..302D; INHERITED
3739 0x302E, // 302E..302F; HANGUL
3740 0x3030, // 3030..3037; COMMON
3741 0x3038, // 3038..303B; HAN
3742 0x303C, // 303C..3040; COMMON
3743 0x3041, // 3041..3098; HIRAGANA
3744 0x3099, // 3099..309A; INHERITED
3745 0x309B, // 309B..309C; COMMON
3746 0x309D, // 309D..309F; HIRAGANA
3747 0x30A0, // 30A0..30A0; COMMON
3748 0x30A1, // 30A1..30FA; KATAKANA
3749 0x30FB, // 30FB..30FC; COMMON
3750 0x30FD, // 30FD..3104; KATAKANA
3751 0x3105, // 3105..3130; BOPOMOFO
3752 0x3131, // 3131..318F; HANGUL
3753 0x3190, // 3190..319F; COMMON
3754 0x31A0, // 31A0..31BF; BOPOMOFO
3755 0x31C0, // 31C0..31EF; COMMON
3756 0x31F0, // 31F0..31FF; KATAKANA
3757 0x3200, // 3200..321F; HANGUL
3758 0x3220, // 3220..325F; COMMON
3759 0x3260, // 3260..327E; HANGUL
3760 0x327F, // 327F..32CF; COMMON
3761 0x32D0, // 32D0..3357; KATAKANA
3762 0x3358, // 3358..33FF; COMMON
3763 0x3400, // 3400..4DBF; HAN
3764 0x4DC0, // 4DC0..4DFF; COMMON
3765 0x4E00, // 4E00..9FFF; HAN
3766 0xA000, // A000..A4CF; YI
3767 0xA4D0, // A4D0..A4FF; LISU
3768 0xA500, // A500..A63F; VAI
3769 0xA640, // A640..A69F; CYRILLIC
3770 0xA6A0, // A6A0..A6FF; BAMUM
3771 0xA700, // A700..A721; COMMON
3772 0xA722, // A722..A787; LATIN
3773 0xA788, // A788..A78A; COMMON
3774 0xA78B, // A78B..A7FF; LATIN
3775 0xA800, // A800..A82F; SYLOTI_NAGRI
3776 0xA830, // A830..A83F; COMMON
3777 0xA840, // A840..A87F; PHAGS_PA
3778 0xA880, // A880..A8DF; SAURASHTRA
3779 0xA8E0, // A8E0..A8FF; DEVANAGARI
3780 0xA900, // A900..A92F; KAYAH_LI
3781 0xA930, // A930..A95F; REJANG
3782 0xA960, // A960..A97F; HANGUL
3783 0xA980, // A980..A9FF; JAVANESE
3784 0xAA00, // AA00..AA5F; CHAM
3785 0xAA60, // AA60..AA7F; MYANMAR
3786 0xAA80, // AA80..AB00; TAI_VIET
3787 0xAB01, // AB01..ABBF; ETHIOPIC
3788 0xABC0, // ABC0..ABFF; MEETEI_MAYEK
3789 0xAC00, // AC00..D7FB; HANGUL
3790 0xD7FC, // D7FC..F8FF; UNKNOWN
3791 0xF900, // F900..FAFF; HAN
3792 0xFB00, // FB00..FB12; LATIN
3793 0xFB13, // FB13..FB1C; ARMENIAN
3794 0xFB1D, // FB1D..FB4F; HEBREW
3795 0xFB50, // FB50..FD3D; ARABIC
3796 0xFD3E, // FD3E..FD4F; COMMON
3797 0xFD50, // FD50..FDFC; ARABIC
3798 0xFDFD, // FDFD..FDFF; COMMON
3799 0xFE00, // FE00..FE0F; INHERITED
3800 0xFE10, // FE10..FE1F; COMMON
3801 0xFE20, // FE20..FE2F; INHERITED
3802 0xFE30, // FE30..FE6F; COMMON
3803 0xFE70, // FE70..FEFE; ARABIC
3804 0xFEFF, // FEFF..FF20; COMMON
3805 0xFF21, // FF21..FF3A; LATIN
3806 0xFF3B, // FF3B..FF40; COMMON
3807 0xFF41, // FF41..FF5A; LATIN
3808 0xFF5B, // FF5B..FF65; COMMON
3809 0xFF66, // FF66..FF6F; KATAKANA
3810 0xFF70, // FF70..FF70; COMMON
3811 0xFF71, // FF71..FF9D; KATAKANA
3812 0xFF9E, // FF9E..FF9F; COMMON
3813 0xFFA0, // FFA0..FFDF; HANGUL
3814 0xFFE0, // FFE0..FFFF; COMMON
3815 0x10000, // 10000..100FF; LINEAR_B
3816 0x10100, // 10100..1013F; COMMON
3817 0x10140, // 10140..1018F; GREEK
3818 0x10190, // 10190..101FC; COMMON
3819 0x101FD, // 101FD..1027F; INHERITED
3820 0x10280, // 10280..1029F; LYCIAN
3821 0x102A0, // 102A0..102FF; CARIAN
3822 0x10300, // 10300..1032F; OLD_ITALIC
3823 0x10330, // 10330..1037F; GOTHIC
3824 0x10380, // 10380..1039F; UGARITIC
3825 0x103A0, // 103A0..103FF; OLD_PERSIAN
3826 0x10400, // 10400..1044F; DESERET
3827 0x10450, // 10450..1047F; SHAVIAN
3828 0x10480, // 10480..107FF; OSMANYA
3829 0x10800, // 10800..1083F; CYPRIOT
3830 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC
3831 0x10900, // 10900..1091F; PHOENICIAN
3832 0x10920, // 10920..109FF; LYDIAN
3833 0x10A00, // 10A00..10A5F; KHAROSHTHI
3834 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN
3835 0x10B00, // 10B00..10B3F; AVESTAN
3836 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3837 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3838 0x10C00, // 10C00..10E5F; OLD_TURKIC
3839 0x10E60, // 10E60..10FFF; ARABIC
3840 0x11000, // 11000..1107F; BRAHMI
3841 0x11080, // 11080..11FFF; KAITHI
3842 0x12000, // 12000..12FFF; CUNEIFORM
3843 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS
3844 0x16800, // 16800..16A38; BAMUM
3845 0x1B000, // 1B000..1B000; KATAKANA
3846 0x1B001, // 1B001..1CFFF; HIRAGANA
3847 0x1D000, // 1D000..1D166; COMMON
3848 0x1D167, // 1D167..1D169; INHERITED
3849 0x1D16A, // 1D16A..1D17A; COMMON
3850 0x1D17B, // 1D17B..1D182; INHERITED
3851 0x1D183, // 1D183..1D184; COMMON
3852 0x1D185, // 1D185..1D18B; INHERITED
3853 0x1D18C, // 1D18C..1D1A9; COMMON
3854 0x1D1AA, // 1D1AA..1D1AD; INHERITED
3855 0x1D1AE, // 1D1AE..1D1FF; COMMON
3856 0x1D200, // 1D200..1D2FF; GREEK
3857 0x1D300, // 1D300..1F1FF; COMMON
3858 0x1F200, // 1F200..1F200; HIRAGANA
3859 0x1F201, // 1F210..1FFFF; COMMON
3860 0x20000, // 20000..E0000; HAN
3861 0xE0001, // E0001..E00FF; COMMON
3862 0xE0100, // E0100..E01EF; INHERITED
3863 0xE01F0 // E01F0..10FFFF; UNKNOWN
3864
3865 };
3866
3867 private static final UnicodeScript[] scripts = {
3868 COMMON,
3869 LATIN,
3870 COMMON,
3871 LATIN,
3872 COMMON,
3873 LATIN,
3874 COMMON,
3875 LATIN,
3876 COMMON,
3877 LATIN,
3878 COMMON,
3879 LATIN,
3880 COMMON,
3881 LATIN,
3882 COMMON,
3883 LATIN,
3884 COMMON,
3885 BOPOMOFO,
3886 COMMON,
3887 INHERITED,
3888 GREEK,
3889 COMMON,
3890 GREEK,
3891 COMMON,
3892 GREEK,
3893 COMMON,
3894 GREEK,
3895 COMMON,
3896 GREEK,
3897 COPTIC,
3898 GREEK,
3899 CYRILLIC,
3900 INHERITED,
3901 CYRILLIC,
3902 ARMENIAN,
3903 COMMON,
3904 ARMENIAN,
3905 HEBREW,
3906 ARABIC,
3907 COMMON,
3908 ARABIC,
3909 COMMON,
3910 ARABIC,
3911 COMMON,
3912 ARABIC,
3913 COMMON,
3914 ARABIC,
3915 INHERITED,
3916 ARABIC,
3917 INHERITED,
3918 COMMON,
3919 ARABIC,
3920 INHERITED,
3921 ARABIC,
3922 COMMON,
3923 ARABIC,
3924 SYRIAC,
3925 ARABIC,
3926 THAANA,
3927 NKO,
3928 SAMARITAN,
3929 MANDAIC,
3930 DEVANAGARI,
3931 INHERITED,
3932 DEVANAGARI,
3933 COMMON,
3934 DEVANAGARI,
3935 COMMON,
3936 DEVANAGARI,
3937 BENGALI,
3938 GURMUKHI,
3939 GUJARATI,
3940 ORIYA,
3941 TAMIL,
3942 TELUGU,
3943 KANNADA,
3944 MALAYALAM,
3945 SINHALA,
3946 THAI,
3947 COMMON,
3948 THAI,
3949 LAO,
3950 TIBETAN,
3951 COMMON,
3952 TIBETAN,
3953 MYANMAR,
3954 GEORGIAN,
3955 COMMON,
3956 GEORGIAN,
3957 HANGUL,
3958 ETHIOPIC,
3959 CHEROKEE,
3960 CANADIAN_ABORIGINAL,
3961 OGHAM,
3962 RUNIC,
3963 COMMON,
3964 RUNIC,
3965 TAGALOG,
3966 HANUNOO,
3967 COMMON,
3968 BUHID,
3969 TAGBANWA,
3970 KHMER,
3971 MONGOLIAN,
3972 COMMON,
3973 MONGOLIAN,
3974 COMMON,
3975 MONGOLIAN,
3976 CANADIAN_ABORIGINAL,
3977 LIMBU,
3978 TAI_LE,
3979 NEW_TAI_LUE,
3980 KHMER,
3981 BUGINESE,
3982 TAI_THAM,
3983 BALINESE,
3984 SUNDANESE,
3985 BATAK,
3986 LEPCHA,
3987 OL_CHIKI,
3988 INHERITED,
3989 COMMON,
3990 INHERITED,
3991 COMMON,
3992 INHERITED,
3993 COMMON,
3994 INHERITED,
3995 COMMON,
3996 LATIN,
3997 GREEK,
3998 CYRILLIC,
3999 LATIN,
4000 GREEK,
4001 LATIN,
4002 GREEK,
4003 LATIN,
4004 CYRILLIC,
4005 LATIN,
4006 GREEK,
4007 INHERITED,
4008 LATIN,
4009 GREEK,
4010 COMMON,
4011 INHERITED,
4012 COMMON,
4013 LATIN,
4014 COMMON,
4015 LATIN,
4016 COMMON,
4017 LATIN,
4018 COMMON,
4019 INHERITED,
4020 COMMON,
4021 GREEK,
4022 COMMON,
4023 LATIN,
4024 COMMON,
4025 LATIN,
4026 COMMON,
4027 LATIN,
4028 COMMON,
4029 LATIN,
4030 COMMON,
4031 BRAILLE,
4032 COMMON,
4033 GLAGOLITIC,
4034 LATIN,
4035 COPTIC,
4036 GEORGIAN,
4037 TIFINAGH,
4038 ETHIOPIC,
4039 CYRILLIC,
4040 COMMON,
4041 HAN,
4042 COMMON,
4043 HAN,
4044 COMMON,
4045 HAN,
4046 COMMON,
4047 HAN,
4048 INHERITED,
4049 HANGUL,
4050 COMMON,
4051 HAN,
4052 COMMON,
4053 HIRAGANA,
4054 INHERITED,
4055 COMMON,
4056 HIRAGANA,
4057 COMMON,
4058 KATAKANA,
4059 COMMON,
4060 KATAKANA,
4061 BOPOMOFO,
4062 HANGUL,
4063 COMMON,
4064 BOPOMOFO,
4065 COMMON,
4066 KATAKANA,
4067 HANGUL,
4068 COMMON,
4069 HANGUL,
4070 COMMON,
4071 KATAKANA,
4072 COMMON,
4073 HAN,
4074 COMMON,
4075 HAN,
4076 YI,
4077 LISU,
4078 VAI,
4079 CYRILLIC,
4080 BAMUM,
4081 COMMON,
4082 LATIN,
4083 COMMON,
4084 LATIN,
4085 SYLOTI_NAGRI,
4086 COMMON,
4087 PHAGS_PA,
4088 SAURASHTRA,
4089 DEVANAGARI,
4090 KAYAH_LI,
4091 REJANG,
4092 HANGUL,
4093 JAVANESE,
4094 CHAM,
4095 MYANMAR,
4096 TAI_VIET,
4097 ETHIOPIC,
4098 MEETEI_MAYEK,
4099 HANGUL,
4100 UNKNOWN,
4101 HAN,
4102 LATIN,
4103 ARMENIAN,
4104 HEBREW,
4105 ARABIC,
4106 COMMON,
4107 ARABIC,
4108 COMMON,
4109 INHERITED,
4110 COMMON,
4111 INHERITED,
4112 COMMON,
4113 ARABIC,
4114 COMMON,
4115 LATIN,
4116 COMMON,
4117 LATIN,
4118 COMMON,
4119 KATAKANA,
4120 COMMON,
4121 KATAKANA,
4122 COMMON,
4123 HANGUL,
4124 COMMON,
4125 LINEAR_B,
4126 COMMON,
4127 GREEK,
4128 COMMON,
4129 INHERITED,
4130 LYCIAN,
4131 CARIAN,
4132 OLD_ITALIC,
4133 GOTHIC,
4134 UGARITIC,
4135 OLD_PERSIAN,
4136 DESERET,
4137 SHAVIAN,
4138 OSMANYA,
4139 CYPRIOT,
4140 IMPERIAL_ARAMAIC,
4141 PHOENICIAN,
4142 LYDIAN,
4143 KHAROSHTHI,
4144 OLD_SOUTH_ARABIAN,
4145 AVESTAN,
4146 INSCRIPTIONAL_PARTHIAN,
4147 INSCRIPTIONAL_PAHLAVI,
4148 OLD_TURKIC,
4149 ARABIC,
4150 BRAHMI,
4151 KAITHI,
4152 CUNEIFORM,
4153 EGYPTIAN_HIEROGLYPHS,
4154 BAMUM,
4155 KATAKANA,
4156 HIRAGANA,
4157 COMMON,
4158 INHERITED,
4159 COMMON,
4160 INHERITED,
4161 COMMON,
4162 INHERITED,
4163 COMMON,
4164 INHERITED,
4165 COMMON,
4166 GREEK,
4167 COMMON,
4168 HIRAGANA,
4169 COMMON,
4170 HAN,
4171 COMMON,
4172 INHERITED,
4173 UNKNOWN
4174 };
4175
4176 private static HashMap<String, Character.UnicodeScript> aliases;
4177 static {
4178 aliases = new HashMap<>(128);
4179 aliases.put("ARAB", ARABIC);
4180 aliases.put("ARMI", IMPERIAL_ARAMAIC);
4181 aliases.put("ARMN", ARMENIAN);
4182 aliases.put("AVST", AVESTAN);
4183 aliases.put("BALI", BALINESE);
4184 aliases.put("BAMU", BAMUM);
4185 aliases.put("BATK", BATAK);
4186 aliases.put("BENG", BENGALI);
4187 aliases.put("BOPO", BOPOMOFO);
4188 aliases.put("BRAI", BRAILLE);
4189 aliases.put("BRAH", BRAHMI);
4190 aliases.put("BUGI", BUGINESE);
4191 aliases.put("BUHD", BUHID);
4192 aliases.put("CANS", CANADIAN_ABORIGINAL);
4193 aliases.put("CARI", CARIAN);
4194 aliases.put("CHAM", CHAM);
4195 aliases.put("CHER", CHEROKEE);
4196 aliases.put("COPT", COPTIC);
4197 aliases.put("CPRT", CYPRIOT);
4198 aliases.put("CYRL", CYRILLIC);
4199 aliases.put("DEVA", DEVANAGARI);
4200 aliases.put("DSRT", DESERET);
4201 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4202 aliases.put("ETHI", ETHIOPIC);
4203 aliases.put("GEOR", GEORGIAN);
4204 aliases.put("GLAG", GLAGOLITIC);
4205 aliases.put("GOTH", GOTHIC);
4206 aliases.put("GREK", GREEK);
4207 aliases.put("GUJR", GUJARATI);
4208 aliases.put("GURU", GURMUKHI);
4209 aliases.put("HANG", HANGUL);
4210 aliases.put("HANI", HAN);
4211 aliases.put("HANO", HANUNOO);
4212 aliases.put("HEBR", HEBREW);
4213 aliases.put("HIRA", HIRAGANA);
4214 // it appears we don't have the KATAKANA_OR_HIRAGANA
4215 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4216 aliases.put("ITAL", OLD_ITALIC);
4217 aliases.put("JAVA", JAVANESE);
4218 aliases.put("KALI", KAYAH_LI);
4219 aliases.put("KANA", KATAKANA);
4220 aliases.put("KHAR", KHAROSHTHI);
4221 aliases.put("KHMR", KHMER);
4222 aliases.put("KNDA", KANNADA);
4223 aliases.put("KTHI", KAITHI);
4224 aliases.put("LANA", TAI_THAM);
4225 aliases.put("LAOO", LAO);
4226 aliases.put("LATN", LATIN);
4227 aliases.put("LEPC", LEPCHA);
4228 aliases.put("LIMB", LIMBU);
4229 aliases.put("LINB", LINEAR_B);
4230 aliases.put("LISU", LISU);
4231 aliases.put("LYCI", LYCIAN);
4232 aliases.put("LYDI", LYDIAN);
4233 aliases.put("MAND", MANDAIC);
4234 aliases.put("MLYM", MALAYALAM);
4235 aliases.put("MONG", MONGOLIAN);
4236 aliases.put("MTEI", MEETEI_MAYEK);
4237 aliases.put("MYMR", MYANMAR);
4238 aliases.put("NKOO", NKO);
4239 aliases.put("OGAM", OGHAM);
4240 aliases.put("OLCK", OL_CHIKI);
4241 aliases.put("ORKH", OLD_TURKIC);
4242 aliases.put("ORYA", ORIYA);
4243 aliases.put("OSMA", OSMANYA);
4244 aliases.put("PHAG", PHAGS_PA);
4245 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4246 aliases.put("PHNX", PHOENICIAN);
4247 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4248 aliases.put("RJNG", REJANG);
4249 aliases.put("RUNR", RUNIC);
4250 aliases.put("SAMR", SAMARITAN);
4251 aliases.put("SARB", OLD_SOUTH_ARABIAN);
4252 aliases.put("SAUR", SAURASHTRA);
4253 aliases.put("SHAW", SHAVIAN);
4254 aliases.put("SINH", SINHALA);
4255 aliases.put("SUND", SUNDANESE);
4256 aliases.put("SYLO", SYLOTI_NAGRI);
4257 aliases.put("SYRC", SYRIAC);
4258 aliases.put("TAGB", TAGBANWA);
4259 aliases.put("TALE", TAI_LE);
4260 aliases.put("TALU", NEW_TAI_LUE);
4261 aliases.put("TAML", TAMIL);
4262 aliases.put("TAVT", TAI_VIET);
4263 aliases.put("TELU", TELUGU);
4264 aliases.put("TFNG", TIFINAGH);
4265 aliases.put("TGLG", TAGALOG);
4266 aliases.put("THAA", THAANA);
4267 aliases.put("THAI", THAI);
4268 aliases.put("TIBT", TIBETAN);
4269 aliases.put("UGAR", UGARITIC);
4270 aliases.put("VAII", VAI);
4271 aliases.put("XPEO", OLD_PERSIAN);
4272 aliases.put("XSUX", CUNEIFORM);
4273 aliases.put("YIII", YI);
4274 aliases.put("ZINH", INHERITED);
4275 aliases.put("ZYYY", COMMON);
4276 aliases.put("ZZZZ", UNKNOWN);
4277 }
4278
4279 /**
4280 * Returns the enum constant representing the Unicode script of which
4281 * the given character (Unicode code point) is assigned to.
4282 *
4283 * @param codePoint the character (Unicode code point) in question.
4284 * @return The {@code UnicodeScript} constant representing the
4285 * Unicode script of which this character is assigned to.
4286 *
4287 * @exception IllegalArgumentException if the specified
4288 * {@code codePoint} is an invalid Unicode code point.
4289 * @see Character#isValidCodePoint(int)
4290 *
4291 */
4292 public static UnicodeScript of(int codePoint) {
4293 if (!isValidCodePoint(codePoint))
4294 throw new IllegalArgumentException();
4295 int type = getType(codePoint);
4296 // leave SURROGATE and PRIVATE_USE for table lookup
4297 if (type == UNASSIGNED)
4298 return UNKNOWN;
4299 int index = Arrays.binarySearch(scriptStarts, codePoint);
4300 if (index < 0)
4301 index = -index - 2;
4302 return scripts[index];
4303 }
4304
4305 /**
4306 * Returns the UnicodeScript constant with the given Unicode script
4307 * name or the script name alias. Script names and their aliases are
4308 * determined by The Unicode Standard. The files Scripts<version>.txt
4309 * and PropertyValueAliases<version>.txt define script names
4310 * and the script name aliases for a particular version of the
4311 * standard. The {@link Character} class specifies the version of
4312 * the standard that it supports.
4313 * <p>
4314 * Character case is ignored for all of the valid script names.
4315 * The en_US locale's case mapping rules are used to provide
4316 * case-insensitive string comparisons for script name validation.
4317 * <p>
4318 *
4319 * @param scriptName A {@code UnicodeScript} name.
4320 * @return The {@code UnicodeScript} constant identified
4321 * by {@code scriptName}
4322 * @throws IllegalArgumentException if {@code scriptName} is an
4323 * invalid name
4324 * @throws NullPointerException if {@code scriptName} is null
4325 */
4326 public static final UnicodeScript forName(String scriptName) {
4327 scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4328 //.replace(' ', '_'));
4329 UnicodeScript sc = aliases.get(scriptName);
4330 if (sc != null)
4331 return sc;
4332 return valueOf(scriptName);
4333 }
4334 }
4335
4336 /**
4337 * The value of the {@code Character}.
4338 *
4339 * @serial
4340 */
4341 private final char value;
4342
4343 /** use serialVersionUID from JDK 1.0.2 for interoperability */
4344 private static final long serialVersionUID = 3786198910865385080L;
4345
4346 /**
4347 * Constructs a newly allocated {@code Character} object that
4348 * represents the specified {@code char} value.
4349 *
4350 * @param value the value to be represented by the
4351 * {@code Character} object.
4352 */
4353 public Character(char value) {
4354 this.value = value;
4355 }
4356
4357 private static class CharacterCache {
4358 private CharacterCache(){}
4359
4360 static final Character cache[] = new Character[127 + 1];
4361
4362 static {
4363 for (int i = 0; i < cache.length; i++)
4364 cache[i] = new Character((char)i);
4365 }
4366 }
4367
4368 /**
4369 * Returns a <tt>Character</tt> instance representing the specified
4370 * <tt>char</tt> value.
4371 * If a new <tt>Character</tt> instance is not required, this method
4372 * should generally be used in preference to the constructor
4373 * {@link #Character(char)}, as this method is likely to yield
4374 * significantly better space and time performance by caching
4375 * frequently requested values.
4376 *
4377 * This method will always cache values in the range {@code
4378 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4379 * cache other values outside of this range.
4380 *
4381 * @param c a char value.
4382 * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4383 * @since 1.5
4384 */
4385 public static Character valueOf(char c) {
4386 if (c <= 127) { // must cache
4387 return CharacterCache.cache[(int)c];
4388 }
4389 return new Character(c);
4390 }
4391
4392 /**
4393 * Returns the value of this {@code Character} object.
4394 * @return the primitive {@code char} value represented by
4395 * this object.
4396 */
4397 public char charValue() {
4398 return value;
4399 }
4400
4401 /**
4402 * Returns a hash code for this {@code Character}; equal to the result
4403 * of invoking {@code charValue()}.
4404 *
4405 * @return a hash code value for this {@code Character}
4406 */
4407 public int hashCode() {
4408 return (int)value;
4409 }
4410
4411 /**
4412 * Compares this object against the specified object.
4413 * The result is {@code true} if and only if the argument is not
4414 * {@code null} and is a {@code Character} object that
4415 * represents the same {@code char} value as this object.
4416 *
4417 * @param obj the object to compare with.
4418 * @return {@code true} if the objects are the same;
4419 * {@code false} otherwise.
4420 */
4421 public boolean equals(Object obj) {
4422 if (obj instanceof Character) {
4423 return value == ((Character)obj).charValue();
4424 }
4425 return false;
4426 }
4427
4428 /**
4429 * Returns a {@code String} object representing this
4430 * {@code Character}'s value. The result is a string of
4431 * length 1 whose sole component is the primitive
4432 * {@code char} value represented by this
4433 * {@code Character} object.
4434 *
4435 * @return a string representation of this object.
4436 */
4437 public String toString() {
4438 char buf[] = {value};
4439 return String.valueOf(buf);
4440 }
4441
4442 /**
4443 * Returns a {@code String} object representing the
4444 * specified {@code char}. The result is a string of length
4445 * 1 consisting solely of the specified {@code char}.
4446 *
4447 * @param c the {@code char} to be converted
4448 * @return the string representation of the specified {@code char}
4449 * @since 1.4
4450 */
4451 public static String toString(char c) {
4452 return String.valueOf(c);
4453 }
4454
4455 /**
4456 * Determines whether the specified code point is a valid
4457 * <a href="http://www.unicode.org/glossary/#code_point">
4458 * Unicode code point value</a>.
4459 *
4460 * @param codePoint the Unicode code point to be tested
4461 * @return {@code true} if the specified code point value is between
4462 * {@link #MIN_CODE_POINT} and
4463 * {@link #MAX_CODE_POINT} inclusive;
4464 * {@code false} otherwise.
4465 * @since 1.5
4466 */
4467 public static boolean isValidCodePoint(int codePoint) {
4468 // Optimized form of:
4469 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4470 int plane = codePoint >>> 16;
4471 return plane < ((MAX_CODE_POINT + 1) >>> 16);
4472 }
4473
4474 /**
4475 * Determines whether the specified character (Unicode code point)
4476 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4477 * Such code points can be represented using a single {@code char}.
4478 *
4479 * @param codePoint the character (Unicode code point) to be tested
4480 * @return {@code true} if the specified code point is between
4481 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4482 * {@code false} otherwise.
4483 * @since 1.7
4484 */
4485 public static boolean isBmpCodePoint(int codePoint) {
4486 return codePoint >>> 16 == 0;
4487 // Optimized form of:
4488 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4489 // We consistently use logical shift (>>>) to facilitate
4490 // additional runtime optimizations.
4491 }
4492
4493 /**
4494 * Determines whether the specified character (Unicode code point)
4495 * is in the <a href="#supplementary">supplementary character</a> range.
4496 *
4497 * @param codePoint the character (Unicode code point) to be tested
4498 * @return {@code true} if the specified code point is between
4499 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4500 * {@link #MAX_CODE_POINT} inclusive;
4501 * {@code false} otherwise.
4502 * @since 1.5
4503 */
4504 public static boolean isSupplementaryCodePoint(int codePoint) {
4505 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4506 && codePoint < MAX_CODE_POINT + 1;
4507 }
4508
4509 /**
4510 * Determines if the given {@code char} value is a
4511 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4512 * Unicode high-surrogate code unit</a>
4513 * (also known as <i>leading-surrogate code unit</i>).
4514 *
4515 * <p>Such values do not represent characters by themselves,
4516 * but are used in the representation of
4517 * <a href="#supplementary">supplementary characters</a>
4518 * in the UTF-16 encoding.
4519 *
4520 * @param ch the {@code char} value to be tested.
4521 * @return {@code true} if the {@code char} value is between
4522 * {@link #MIN_HIGH_SURROGATE} and
4523 * {@link #MAX_HIGH_SURROGATE} inclusive;
4524 * {@code false} otherwise.
4525 * @see Character#isLowSurrogate(char)
4526 * @see Character.UnicodeBlock#of(int)
4527 * @since 1.5
4528 */
4529 public static boolean isHighSurrogate(char ch) {
4530 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4531 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4532 }
4533
4534 /**
4535 * Determines if the given {@code char} value is a
4536 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4537 * Unicode low-surrogate code unit</a>
4538 * (also known as <i>trailing-surrogate code unit</i>).
4539 *
4540 * <p>Such values do not represent characters by themselves,
4541 * but are used in the representation of
4542 * <a href="#supplementary">supplementary characters</a>
4543 * in the UTF-16 encoding.
4544 *
4545 * @param ch the {@code char} value to be tested.
4546 * @return {@code true} if the {@code char} value is between
4547 * {@link #MIN_LOW_SURROGATE} and
4548 * {@link #MAX_LOW_SURROGATE} inclusive;
4549 * {@code false} otherwise.
4550 * @see Character#isHighSurrogate(char)
4551 * @since 1.5
4552 */
4553 public static boolean isLowSurrogate(char ch) {
4554 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4555 }
4556
4557 /**
4558 * Determines if the given {@code char} value is a Unicode
4559 * <i>surrogate code unit</i>.
4560 *
4561 * <p>Such values do not represent characters by themselves,
4562 * but are used in the representation of
4563 * <a href="#supplementary">supplementary characters</a>
4564 * in the UTF-16 encoding.
4565 *
4566 * <p>A char value is a surrogate code unit if and only if it is either
4567 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4568 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4569 *
4570 * @param ch the {@code char} value to be tested.
4571 * @return {@code true} if the {@code char} value is between
4572 * {@link #MIN_SURROGATE} and
4573 * {@link #MAX_SURROGATE} inclusive;
4574 * {@code false} otherwise.
4575 * @since 1.7
4576 */
4577 public static boolean isSurrogate(char ch) {
4578 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4579 }
4580
4581 /**
4582 * Determines whether the specified pair of {@code char}
4583 * values is a valid
4584 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4585 * Unicode surrogate pair</a>.
4586
4587 * <p>This method is equivalent to the expression:
4588 * <blockquote><pre>
4589 * isHighSurrogate(high) && isLowSurrogate(low)
4590 * </pre></blockquote>
4591 *
4592 * @param high the high-surrogate code value to be tested
4593 * @param low the low-surrogate code value to be tested
4594 * @return {@code true} if the specified high and
4595 * low-surrogate code values represent a valid surrogate pair;
4596 * {@code false} otherwise.
4597 * @since 1.5
4598 */
4599 public static boolean isSurrogatePair(char high, char low) {
4600 return isHighSurrogate(high) && isLowSurrogate(low);
4601 }
4602
4603 /**
4604 * Determines the number of {@code char} values needed to
4605 * represent the specified character (Unicode code point). If the
4606 * specified character is equal to or greater than 0x10000, then
4607 * the method returns 2. Otherwise, the method returns 1.
4608 *
4609 * <p>This method doesn't validate the specified character to be a
4610 * valid Unicode code point. The caller must validate the
4611 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4612 * if necessary.
4613 *
4614 * @param codePoint the character (Unicode code point) to be tested.
4615 * @return 2 if the character is a valid supplementary character; 1 otherwise.
4616 * @see Character#isSupplementaryCodePoint(int)
4617 * @since 1.5
4618 */
4619 public static int charCount(int codePoint) {
4620 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4621 }
4622
4623 /**
4624 * Converts the specified surrogate pair to its supplementary code
4625 * point value. This method does not validate the specified
4626 * surrogate pair. The caller must validate it using {@link
4627 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4628 *
4629 * @param high the high-surrogate code unit
4630 * @param low the low-surrogate code unit
4631 * @return the supplementary code point composed from the
4632 * specified surrogate pair.
4633 * @since 1.5
4634 */
4635 public static int toCodePoint(char high, char low) {
4636 // Optimized form of:
4637 // return ((high - MIN_HIGH_SURROGATE) << 10)
4638 // + (low - MIN_LOW_SURROGATE)
4639 // + MIN_SUPPLEMENTARY_CODE_POINT;
4640 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4641 - (MIN_HIGH_SURROGATE << 10)
4642 - MIN_LOW_SURROGATE);
4643 }
4644
4645 /**
4646 * Returns the code point at the given index of the
4647 * {@code CharSequence}. If the {@code char} value at
4648 * the given index in the {@code CharSequence} is in the
4649 * high-surrogate range, the following index is less than the
4650 * length of the {@code CharSequence}, and the
4651 * {@code char} value at the following index is in the
4652 * low-surrogate range, then the supplementary code point
4653 * corresponding to this surrogate pair is returned. Otherwise,
4654 * the {@code char} value at the given index is returned.
4655 *
4656 * @param seq a sequence of {@code char} values (Unicode code
4657 * units)
4658 * @param index the index to the {@code char} values (Unicode
4659 * code units) in {@code seq} to be converted
4660 * @return the Unicode code point at the given index
4661 * @exception NullPointerException if {@code seq} is null.
4662 * @exception IndexOutOfBoundsException if the value
4663 * {@code index} is negative or not less than
4664 * {@link CharSequence#length() seq.length()}.
4665 * @since 1.5
4666 */
4667 public static int codePointAt(CharSequence seq, int index) {
4668 char c1 = seq.charAt(index++);
4669 if (isHighSurrogate(c1)) {
4670 if (index < seq.length()) {
4671 char c2 = seq.charAt(index);
4672 if (isLowSurrogate(c2)) {
4673 return toCodePoint(c1, c2);
4674 }
4675 }
4676 }
4677 return c1;
4678 }
4679
4680 /**
4681 * Returns the code point at the given index of the
4682 * {@code char} array. If the {@code char} value at
4683 * the given index in the {@code char} array is in the
4684 * high-surrogate range, the following index is less than the
4685 * length of the {@code char} array, and the
4686 * {@code char} value at the following index is in the
4687 * low-surrogate range, then the supplementary code point
4688 * corresponding to this surrogate pair is returned. Otherwise,
4689 * the {@code char} value at the given index is returned.
4690 *
4691 * @param a the {@code char} array
4692 * @param index the index to the {@code char} values (Unicode
4693 * code units) in the {@code char} array to be converted
4694 * @return the Unicode code point at the given index
4695 * @exception NullPointerException if {@code a} is null.
4696 * @exception IndexOutOfBoundsException if the value
4697 * {@code index} is negative or not less than
4698 * the length of the {@code char} array.
4699 * @since 1.5
4700 */
4701 public static int codePointAt(char[] a, int index) {
4702 return codePointAtImpl(a, index, a.length);
4703 }
4704
4705 /**
4706 * Returns the code point at the given index of the
4707 * {@code char} array, where only array elements with
4708 * {@code index} less than {@code limit} can be used. If
4709 * the {@code char} value at the given index in the
4710 * {@code char} array is in the high-surrogate range, the
4711 * following index is less than the {@code limit}, and the
4712 * {@code char} value at the following index is in the
4713 * low-surrogate range, then the supplementary code point
4714 * corresponding to this surrogate pair is returned. Otherwise,
4715 * the {@code char} value at the given index is returned.
4716 *
4717 * @param a the {@code char} array
4718 * @param index the index to the {@code char} values (Unicode
4719 * code units) in the {@code char} array to be converted
4720 * @param limit the index after the last array element that
4721 * can be used in the {@code char} array
4722 * @return the Unicode code point at the given index
4723 * @exception NullPointerException if {@code a} is null.
4724 * @exception IndexOutOfBoundsException if the {@code index}
4725 * argument is negative or not less than the {@code limit}
4726 * argument, or if the {@code limit} argument is negative or
4727 * greater than the length of the {@code char} array.
4728 * @since 1.5
4729 */
4730 public static int codePointAt(char[] a, int index, int limit) {
4731 if (index >= limit || limit < 0 || limit > a.length) {
4732 throw new IndexOutOfBoundsException();
4733 }
4734 return codePointAtImpl(a, index, limit);
4735 }
4736
4737 // throws ArrayIndexOutofBoundsException if index out of bounds
4738 static int codePointAtImpl(char[] a, int index, int limit) {
4739 char c1 = a[index++];
4740 if (isHighSurrogate(c1)) {
4741 if (index < limit) {
4742 char c2 = a[index];
4743 if (isLowSurrogate(c2)) {
4744 return toCodePoint(c1, c2);
4745 }
4746 }
4747 }
4748 return c1;
4749 }
4750
4751 /**
4752 * Returns the code point preceding the given index of the
4753 * {@code CharSequence}. If the {@code char} value at
4754 * {@code (index - 1)} in the {@code CharSequence} is in
4755 * the low-surrogate range, {@code (index - 2)} is not
4756 * negative, and the {@code char} value at {@code (index - 2)}
4757 * in the {@code CharSequence} is in the
4758 * high-surrogate range, then the supplementary code point
4759 * corresponding to this surrogate pair is returned. Otherwise,
4760 * the {@code char} value at {@code (index - 1)} is
4761 * returned.
4762 *
4763 * @param seq the {@code CharSequence} instance
4764 * @param index the index following the code point that should be returned
4765 * @return the Unicode code point value before the given index.
4766 * @exception NullPointerException if {@code seq} is null.
4767 * @exception IndexOutOfBoundsException if the {@code index}
4768 * argument is less than 1 or greater than {@link
4769 * CharSequence#length() seq.length()}.
4770 * @since 1.5
4771 */
4772 public static int codePointBefore(CharSequence seq, int index) {
4773 char c2 = seq.charAt(--index);
4774 if (isLowSurrogate(c2)) {
4775 if (index > 0) {
4776 char c1 = seq.charAt(--index);
4777 if (isHighSurrogate(c1)) {
4778 return toCodePoint(c1, c2);
4779 }
4780 }
4781 }
4782 return c2;
4783 }
4784
4785 /**
4786 * Returns the code point preceding the given index of the
4787 * {@code char} array. If the {@code char} value at
4788 * {@code (index - 1)} in the {@code char} array is in
4789 * the low-surrogate range, {@code (index - 2)} is not
4790 * negative, and the {@code char} value at {@code (index - 2)}
4791 * in the {@code char} array is in the
4792 * high-surrogate range, then the supplementary code point
4793 * corresponding to this surrogate pair is returned. Otherwise,
4794 * the {@code char} value at {@code (index - 1)} is
4795 * returned.
4796 *
4797 * @param a the {@code char} array
4798 * @param index the index following the code point that should be returned
4799 * @return the Unicode code point value before the given index.
4800 * @exception NullPointerException if {@code a} is null.
4801 * @exception IndexOutOfBoundsException if the {@code index}
4802 * argument is less than 1 or greater than the length of the
4803 * {@code char} array
4804 * @since 1.5
4805 */
4806 public static int codePointBefore(char[] a, int index) {
4807 return codePointBeforeImpl(a, index, 0);
4808 }
4809
4810 /**
4811 * Returns the code point preceding the given index of the
4812 * {@code char} array, where only array elements with
4813 * {@code index} greater than or equal to {@code start}
4814 * can be used. If the {@code char} value at {@code (index - 1)}
4815 * in the {@code char} array is in the
4816 * low-surrogate range, {@code (index - 2)} is not less than
4817 * {@code start}, and the {@code char} value at
4818 * {@code (index - 2)} in the {@code char} array is in
4819 * the high-surrogate range, then the supplementary code point
4820 * corresponding to this surrogate pair is returned. Otherwise,
4821 * the {@code char} value at {@code (index - 1)} is
4822 * returned.
4823 *
4824 * @param a the {@code char} array
4825 * @param index the index following the code point that should be returned
4826 * @param start the index of the first array element in the
4827 * {@code char} array
4828 * @return the Unicode code point value before the given index.
4829 * @exception NullPointerException if {@code a} is null.
4830 * @exception IndexOutOfBoundsException if the {@code index}
4831 * argument is not greater than the {@code start} argument or
4832 * is greater than the length of the {@code char} array, or
4833 * if the {@code start} argument is negative or not less than
4834 * the length of the {@code char} array.
4835 * @since 1.5
4836 */
4837 public static int codePointBefore(char[] a, int index, int start) {
4838 if (index <= start || start < 0 || start >= a.length) {
4839 throw new IndexOutOfBoundsException();
4840 }
4841 return codePointBeforeImpl(a, index, start);
4842 }
4843
4844 // throws ArrayIndexOutofBoundsException if index-1 out of bounds
4845 static int codePointBeforeImpl(char[] a, int index, int start) {
4846 char c2 = a[--index];
4847 if (isLowSurrogate(c2)) {
4848 if (index > start) {
4849 char c1 = a[--index];
4850 if (isHighSurrogate(c1)) {
4851 return toCodePoint(c1, c2);
4852 }
4853 }
4854 }
4855 return c2;
4856 }
4857
4858 /**
4859 * Returns the leading surrogate (a
4860 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4861 * high surrogate code unit</a>) of the
4862 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4863 * surrogate pair</a>
4864 * representing the specified supplementary character (Unicode
4865 * code point) in the UTF-16 encoding. If the specified character
4866 * is not a
4867 * <a href="Character.html#supplementary">supplementary character</a>,
4868 * an unspecified {@code char} is returned.
4869 *
4870 * <p>If
4871 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4872 * is {@code true}, then
4873 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
4874 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
4875 * are also always {@code true}.
4876 *
4877 * @param codePoint a supplementary character (Unicode code point)
4878 * @return the leading surrogate code unit used to represent the
4879 * character in the UTF-16 encoding
4880 * @since 1.7
4881 */
4882 public static char highSurrogate(int codePoint) {
4883 return (char) ((codePoint >>> 10)
4884 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
4885 }
4886
4887 /**
4888 * Returns the trailing surrogate (a
4889 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4890 * low surrogate code unit</a>) of the
4891 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4892 * surrogate pair</a>
4893 * representing the specified supplementary character (Unicode
4894 * code point) in the UTF-16 encoding. If the specified character
4895 * is not a
4896 * <a href="Character.html#supplementary">supplementary character</a>,
4897 * an unspecified {@code char} is returned.
4898 *
4899 * <p>If
4900 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4901 * is {@code true}, then
4902 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
4903 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
4904 * are also always {@code true}.
4905 *
4906 * @param codePoint a supplementary character (Unicode code point)
4907 * @return the trailing surrogate code unit used to represent the
4908 * character in the UTF-16 encoding
4909 * @since 1.7
4910 */
4911 public static char lowSurrogate(int codePoint) {
4912 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
4913 }
4914
4915 /**
4916 * Converts the specified character (Unicode code point) to its
4917 * UTF-16 representation. If the specified code point is a BMP
4918 * (Basic Multilingual Plane or Plane 0) value, the same value is
4919 * stored in {@code dst[dstIndex]}, and 1 is returned. If the
4920 * specified code point is a supplementary character, its
4921 * surrogate values are stored in {@code dst[dstIndex]}
4922 * (high-surrogate) and {@code dst[dstIndex+1]}
4923 * (low-surrogate), and 2 is returned.
4924 *
4925 * @param codePoint the character (Unicode code point) to be converted.
4926 * @param dst an array of {@code char} in which the
4927 * {@code codePoint}'s UTF-16 value is stored.
4928 * @param dstIndex the start index into the {@code dst}
4929 * array where the converted value is stored.
4930 * @return 1 if the code point is a BMP code point, 2 if the
4931 * code point is a supplementary code point.
4932 * @exception IllegalArgumentException if the specified
4933 * {@code codePoint} is not a valid Unicode code point.
4934 * @exception NullPointerException if the specified {@code dst} is null.
4935 * @exception IndexOutOfBoundsException if {@code dstIndex}
4936 * is negative or not less than {@code dst.length}, or if
4937 * {@code dst} at {@code dstIndex} doesn't have enough
4938 * array element(s) to store the resulting {@code char}
4939 * value(s). (If {@code dstIndex} is equal to
4940 * {@code dst.length-1} and the specified
4941 * {@code codePoint} is a supplementary character, the
4942 * high-surrogate value is not stored in
4943 * {@code dst[dstIndex]}.)
4944 * @since 1.5
4945 */
4946 public static int toChars(int codePoint, char[] dst, int dstIndex) {
4947 if (isBmpCodePoint(codePoint)) {
4948 dst[dstIndex] = (char) codePoint;
4949 return 1;
4950 } else if (isValidCodePoint(codePoint)) {
4951 toSurrogates(codePoint, dst, dstIndex);
4952 return 2;
4953 } else {
4954 throw new IllegalArgumentException();
4955 }
4956 }
4957
4958 /**
4959 * Converts the specified character (Unicode code point) to its
4960 * UTF-16 representation stored in a {@code char} array. If
4961 * the specified code point is a BMP (Basic Multilingual Plane or
4962 * Plane 0) value, the resulting {@code char} array has
4963 * the same value as {@code codePoint}. If the specified code
4964 * point is a supplementary code point, the resulting
4965 * {@code char} array has the corresponding surrogate pair.
4966 *
4967 * @param codePoint a Unicode code point
4968 * @return a {@code char} array having
4969 * {@code codePoint}'s UTF-16 representation.
4970 * @exception IllegalArgumentException if the specified
4971 * {@code codePoint} is not a valid Unicode code point.
4972 * @since 1.5
4973 */
4974 public static char[] toChars(int codePoint) {
4975 if (isBmpCodePoint(codePoint)) {
4976 return new char[] { (char) codePoint };
4977 } else if (isValidCodePoint(codePoint)) {
4978 char[] result = new char[2];
4979 toSurrogates(codePoint, result, 0);
4980 return result;
4981 } else {
4982 throw new IllegalArgumentException();
4983 }
4984 }
4985
4986 static void toSurrogates(int codePoint, char[] dst, int index) {
4987 // We write elements "backwards" to guarantee all-or-nothing
4988 dst[index+1] = lowSurrogate(codePoint);
4989 dst[index] = highSurrogate(codePoint);
4990 }
4991
4992 /**
4993 * Returns the number of Unicode code points in the text range of
4994 * the specified char sequence. The text range begins at the
4995 * specified {@code beginIndex} and extends to the
4996 * {@code char} at index {@code endIndex - 1}. Thus the
4997 * length (in {@code char}s) of the text range is
4998 * {@code endIndex-beginIndex}. Unpaired surrogates within
4999 * the text range count as one code point each.
5000 *
5001 * @param seq the char sequence
5002 * @param beginIndex the index to the first {@code char} of
5003 * the text range.
5004 * @param endIndex the index after the last {@code char} of
5005 * the text range.
5006 * @return the number of Unicode code points in the specified text
5007 * range
5008 * @exception NullPointerException if {@code seq} is null.
5009 * @exception IndexOutOfBoundsException if the
5010 * {@code beginIndex} is negative, or {@code endIndex}
5011 * is larger than the length of the given sequence, or
5012 * {@code beginIndex} is larger than {@code endIndex}.
5013 * @since 1.5
5014 */
5015 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5016 int length = seq.length();
5017 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5018 throw new IndexOutOfBoundsException();
5019 }
5020 int n = endIndex - beginIndex;
5021 for (int i = beginIndex; i < endIndex; ) {
5022 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5023 isLowSurrogate(seq.charAt(i))) {
5024 n--;
5025 i++;
5026 }
5027 }
5028 return n;
5029 }
5030
5031 /**
5032 * Returns the number of Unicode code points in a subarray of the
5033 * {@code char} array argument. The {@code offset}
5034 * argument is the index of the first {@code char} of the
5035 * subarray and the {@code count} argument specifies the
5036 * length of the subarray in {@code char}s. Unpaired
5037 * surrogates within the subarray count as one code point each.
5038 *
5039 * @param a the {@code char} array
5040 * @param offset the index of the first {@code char} in the
5041 * given {@code char} array
5042 * @param count the length of the subarray in {@code char}s
5043 * @return the number of Unicode code points in the specified subarray
5044 * @exception NullPointerException if {@code a} is null.
5045 * @exception IndexOutOfBoundsException if {@code offset} or
5046 * {@code count} is negative, or if {@code offset +
5047 * count} is larger than the length of the given array.
5048 * @since 1.5
5049 */
5050 public static int codePointCount(char[] a, int offset, int count) {
5051 if (count > a.length - offset || offset < 0 || count < 0) {
5052 throw new IndexOutOfBoundsException();
5053 }
5054 return codePointCountImpl(a, offset, count);
5055 }
5056
5057 static int codePointCountImpl(char[] a, int offset, int count) {
5058 int endIndex = offset + count;
5059 int n = count;
5060 for (int i = offset; i < endIndex; ) {
5061 if (isHighSurrogate(a[i++]) && i < endIndex &&
5062 isLowSurrogate(a[i])) {
5063 n--;
5064 i++;
5065 }
5066 }
5067 return n;
5068 }
5069
5070 /**
5071 * Returns the index within the given char sequence that is offset
5072 * from the given {@code index} by {@code codePointOffset}
5073 * code points. Unpaired surrogates within the text range given by
5074 * {@code index} and {@code codePointOffset} count as
5075 * one code point each.
5076 *
5077 * @param seq the char sequence
5078 * @param index the index to be offset
5079 * @param codePointOffset the offset in code points
5080 * @return the index within the char sequence
5081 * @exception NullPointerException if {@code seq} is null.
5082 * @exception IndexOutOfBoundsException if {@code index}
5083 * is negative or larger then the length of the char sequence,
5084 * or if {@code codePointOffset} is positive and the
5085 * subsequence starting with {@code index} has fewer than
5086 * {@code codePointOffset} code points, or if
5087 * {@code codePointOffset} is negative and the subsequence
5088 * before {@code index} has fewer than the absolute value
5089 * of {@code codePointOffset} code points.
5090 * @since 1.5
5091 */
5092 public static int offsetByCodePoints(CharSequence seq, int index,
5093 int codePointOffset) {
5094 int length = seq.length();
5095 if (index < 0 || index > length) {
5096 throw new IndexOutOfBoundsException();
5097 }
5098
5099 int x = index;
5100 if (codePointOffset >= 0) {
5101 int i;
5102 for (i = 0; x < length && i < codePointOffset; i++) {
5103 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5104 isLowSurrogate(seq.charAt(x))) {
5105 x++;
5106 }
5107 }
5108 if (i < codePointOffset) {
5109 throw new IndexOutOfBoundsException();
5110 }
5111 } else {
5112 int i;
5113 for (i = codePointOffset; x > 0 && i < 0; i++) {
5114 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5115 isHighSurrogate(seq.charAt(x-1))) {
5116 x--;
5117 }
5118 }
5119 if (i < 0) {
5120 throw new IndexOutOfBoundsException();
5121 }
5122 }
5123 return x;
5124 }
5125
5126 /**
5127 * Returns the index within the given {@code char} subarray
5128 * that is offset from the given {@code index} by
5129 * {@code codePointOffset} code points. The
5130 * {@code start} and {@code count} arguments specify a
5131 * subarray of the {@code char} array. Unpaired surrogates
5132 * within the text range given by {@code index} and
5133 * {@code codePointOffset} count as one code point each.
5134 *
5135 * @param a the {@code char} array
5136 * @param start the index of the first {@code char} of the
5137 * subarray
5138 * @param count the length of the subarray in {@code char}s
5139 * @param index the index to be offset
5140 * @param codePointOffset the offset in code points
5141 * @return the index within the subarray
5142 * @exception NullPointerException if {@code a} is null.
5143 * @exception IndexOutOfBoundsException
5144 * if {@code start} or {@code count} is negative,
5145 * or if {@code start + count} is larger than the length of
5146 * the given array,
5147 * or if {@code index} is less than {@code start} or
5148 * larger then {@code start + count},
5149 * or if {@code codePointOffset} is positive and the text range
5150 * starting with {@code index} and ending with {@code start + count - 1}
5151 * has fewer than {@code codePointOffset} code
5152 * points,
5153 * or if {@code codePointOffset} is negative and the text range
5154 * starting with {@code start} and ending with {@code index - 1}
5155 * has fewer than the absolute value of
5156 * {@code codePointOffset} code points.
5157 * @since 1.5
5158 */
5159 public static int offsetByCodePoints(char[] a, int start, int count,
5160 int index, int codePointOffset) {
5161 if (count > a.length-start || start < 0 || count < 0
5162 || index < start || index > start+count) {
5163 throw new IndexOutOfBoundsException();
5164 }
5165 return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5166 }
5167
5168 static int offsetByCodePointsImpl(char[]a, int start, int count,
5169 int index, int codePointOffset) {
5170 int x = index;
5171 if (codePointOffset >= 0) {
5172 int limit = start + count;
5173 int i;
5174 for (i = 0; x < limit && i < codePointOffset; i++) {
5175 if (isHighSurrogate(a[x++]) && x < limit &&
5176 isLowSurrogate(a[x])) {
5177 x++;
5178 }
5179 }
5180 if (i < codePointOffset) {
5181 throw new IndexOutOfBoundsException();
5182 }
5183 } else {
5184 int i;
5185 for (i = codePointOffset; x > start && i < 0; i++) {
5186 if (isLowSurrogate(a[--x]) && x > start &&
5187 isHighSurrogate(a[x-1])) {
5188 x--;
5189 }
5190 }
5191 if (i < 0) {
5192 throw new IndexOutOfBoundsException();
5193 }
5194 }
5195 return x;
5196 }
5197
5198 /**
5199 * Determines if the specified character is a lowercase character.
5200 * <p>
5201 * A character is lowercase if its general category type, provided
5202 * by {@code Character.getType(ch)}, is
5203 * {@code LOWERCASE_LETTER}, or it has contributory property
5204 * Other_Lowercase as defined by the Unicode Standard.
5205 * <p>
5206 * The following are examples of lowercase characters:
5207 * <p><blockquote><pre>
5208 * a b c d e f g h i j k l m n o p q r s t u v w x y z
5209 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5210 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5211 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5212 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5213 * </pre></blockquote>
5214 * <p> Many other Unicode characters are lowercase too.
5215 *
5216 * <p><b>Note:</b> This method cannot handle <a
5217 * href="#supplementary"> supplementary characters</a>. To support
5218 * all Unicode characters, including supplementary characters, use
5219 * the {@link #isLowerCase(int)} method.
5220 *
5221 * @param ch the character to be tested.
5222 * @return {@code true} if the character is lowercase;
5223 * {@code false} otherwise.
5224 * @see Character#isLowerCase(char)
5225 * @see Character#isTitleCase(char)
5226 * @see Character#toLowerCase(char)
5227 * @see Character#getType(char)
5228 */
5229 public static boolean isLowerCase(char ch) {
5230 return isLowerCase((int)ch);
5231 }
5232
5233 /**
5234 * Determines if the specified character (Unicode code point) is a
5235 * lowercase character.
5236 * <p>
5237 * A character is lowercase if its general category type, provided
5238 * by {@link Character#getType getType(codePoint)}, is
5239 * {@code LOWERCASE_LETTER}, or it has contributory property
5240 * Other_Lowercase as defined by the Unicode Standard.
5241 * <p>
5242 * The following are examples of lowercase characters:
5243 * <p><blockquote><pre>
5244 * a b c d e f g h i j k l m n o p q r s t u v w x y z
5245 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5246 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5247 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5248 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5249 * </pre></blockquote>
5250 * <p> Many other Unicode characters are lowercase too.
5251 *
5252 * @param codePoint the character (Unicode code point) to be tested.
5253 * @return {@code true} if the character is lowercase;
5254 * {@code false} otherwise.
5255 * @see Character#isLowerCase(int)
5256 * @see Character#isTitleCase(int)
5257 * @see Character#toLowerCase(int)
5258 * @see Character#getType(int)
5259 * @since 1.5
5260 */
5261 public static boolean isLowerCase(int codePoint) {
5262 return getType(codePoint) == Character.LOWERCASE_LETTER ||
5263 CharacterData.of(codePoint).isOtherLowercase(codePoint);
5264 }
5265
5266 /**
5267 * Determines if the specified character is an uppercase character.
5268 * <p>
5269 * A character is uppercase if its general category type, provided by
5270 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5271 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5272 * <p>
5273 * The following are examples of uppercase characters:
5274 * <p><blockquote><pre>
5275 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5276 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5277 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5278 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5279 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5280 * </pre></blockquote>
5281 * <p> Many other Unicode characters are uppercase too.<p>
5282 *
5283 * <p><b>Note:</b> This method cannot handle <a
5284 * href="#supplementary"> supplementary characters</a>. To support
5285 * all Unicode characters, including supplementary characters, use
5286 * the {@link #isUpperCase(int)} method.
5287 *
5288 * @param ch the character to be tested.
5289 * @return {@code true} if the character is uppercase;
5290 * {@code false} otherwise.
5291 * @see Character#isLowerCase(char)
5292 * @see Character#isTitleCase(char)
5293 * @see Character#toUpperCase(char)
5294 * @see Character#getType(char)
5295 * @since 1.0
5296 */
5297 public static boolean isUpperCase(char ch) {
5298 return isUpperCase((int)ch);
5299 }
5300
5301 /**
5302 * Determines if the specified character (Unicode code point) is an uppercase character.
5303 * <p>
5304 * A character is uppercase if its general category type, provided by
5305 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5306 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5307 * <p>
5308 * The following are examples of uppercase characters:
5309 * <p><blockquote><pre>
5310 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5311 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5312 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5313 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5314 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5315 * </pre></blockquote>
5316 * <p> Many other Unicode characters are uppercase too.<p>
5317 *
5318 * @param codePoint the character (Unicode code point) to be tested.
5319 * @return {@code true} if the character is uppercase;
5320 * {@code false} otherwise.
5321 * @see Character#isLowerCase(int)
5322 * @see Character#isTitleCase(int)
5323 * @see Character#toUpperCase(int)
5324 * @see Character#getType(int)
5325 * @since 1.5
5326 */
5327 public static boolean isUpperCase(int codePoint) {
5328 return getType(codePoint) == Character.UPPERCASE_LETTER ||
5329 CharacterData.of(codePoint).isOtherUppercase(codePoint);
5330 }
5331
5332 /**
5333 * Determines if the specified character is a titlecase character.
5334 * <p>
5335 * A character is a titlecase character if its general
5336 * category type, provided by {@code Character.getType(ch)},
5337 * is {@code TITLECASE_LETTER}.
5338 * <p>
5339 * Some characters look like pairs of Latin letters. For example, there
5340 * is an uppercase letter that looks like "LJ" and has a corresponding
5341 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5342 * is the appropriate form to use when rendering a word in lowercase
5343 * with initial capitals, as for a book title.
5344 * <p>
5345 * These are some of the Unicode characters for which this method returns
5346 * {@code true}:
5347 * <ul>
5348 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5349 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5350 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5351 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5352 * </ul>
5353 * <p> Many other Unicode characters are titlecase too.<p>
5354 *
5355 * <p><b>Note:</b> This method cannot handle <a
5356 * href="#supplementary"> supplementary characters</a>. To support
5357 * all Unicode characters, including supplementary characters, use
5358 * the {@link #isTitleCase(int)} method.
5359 *
5360 * @param ch the character to be tested.
5361 * @return {@code true} if the character is titlecase;
5362 * {@code false} otherwise.
5363 * @see Character#isLowerCase(char)
5364 * @see Character#isUpperCase(char)
5365 * @see Character#toTitleCase(char)
5366 * @see Character#getType(char)
5367 * @since 1.0.2
5368 */
5369 public static boolean isTitleCase(char ch) {
5370 return isTitleCase((int)ch);
5371 }
5372
5373 /**
5374 * Determines if the specified character (Unicode code point) is a titlecase character.
5375 * <p>
5376 * A character is a titlecase character if its general
5377 * category type, provided by {@link Character#getType(int) getType(codePoint)},
5378 * is {@code TITLECASE_LETTER}.
5379 * <p>
5380 * Some characters look like pairs of Latin letters. For example, there
5381 * is an uppercase letter that looks like "LJ" and has a corresponding
5382 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5383 * is the appropriate form to use when rendering a word in lowercase
5384 * with initial capitals, as for a book title.
5385 * <p>
5386 * These are some of the Unicode characters for which this method returns
5387 * {@code true}:
5388 * <ul>
5389 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5390 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5391 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5392 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5393 * </ul>
5394 * <p> Many other Unicode characters are titlecase too.<p>
5395 *
5396 * @param codePoint the character (Unicode code point) to be tested.
5397 * @return {@code true} if the character is titlecase;
5398 * {@code false} otherwise.
5399 * @see Character#isLowerCase(int)
5400 * @see Character#isUpperCase(int)
5401 * @see Character#toTitleCase(int)
5402 * @see Character#getType(int)
5403 * @since 1.5
5404 */
5405 public static boolean isTitleCase(int codePoint) {
5406 return getType(codePoint) == Character.TITLECASE_LETTER;
5407 }
5408
5409 /**
5410 * Determines if the specified character is a digit.
5411 * <p>
5412 * A character is a digit if its general category type, provided
5413 * by {@code Character.getType(ch)}, is
5414 * {@code DECIMAL_DIGIT_NUMBER}.
5415 * <p>
5416 * Some Unicode character ranges that contain digits:
5417 * <ul>
5418 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5419 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5420 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5421 * Arabic-Indic digits
5422 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5423 * Extended Arabic-Indic digits
5424 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5425 * Devanagari digits
5426 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5427 * Fullwidth digits
5428 * </ul>
5429 *
5430 * Many other character ranges contain digits as well.
5431 *
5432 * <p><b>Note:</b> This method cannot handle <a
5433 * href="#supplementary"> supplementary characters</a>. To support
5434 * all Unicode characters, including supplementary characters, use
5435 * the {@link #isDigit(int)} method.
5436 *
5437 * @param ch the character to be tested.
5438 * @return {@code true} if the character is a digit;
5439 * {@code false} otherwise.
5440 * @see Character#digit(char, int)
5441 * @see Character#forDigit(int, int)
5442 * @see Character#getType(char)
5443 */
5444 public static boolean isDigit(char ch) {
5445 return isDigit((int)ch);
5446 }
5447
5448 /**
5449 * Determines if the specified character (Unicode code point) is a digit.
5450 * <p>
5451 * A character is a digit if its general category type, provided
5452 * by {@link Character#getType(int) getType(codePoint)}, is
5453 * {@code DECIMAL_DIGIT_NUMBER}.
5454 * <p>
5455 * Some Unicode character ranges that contain digits:
5456 * <ul>
5457 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5458 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5459 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5460 * Arabic-Indic digits
5461 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5462 * Extended Arabic-Indic digits
5463 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5464 * Devanagari digits
5465 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5466 * Fullwidth digits
5467 * </ul>
5468 *
5469 * Many other character ranges contain digits as well.
5470 *
5471 * @param codePoint the character (Unicode code point) to be tested.
5472 * @return {@code true} if the character is a digit;
5473 * {@code false} otherwise.
5474 * @see Character#forDigit(int, int)
5475 * @see Character#getType(int)
5476 * @since 1.5
5477 */
5478 public static boolean isDigit(int codePoint) {
5479 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5480 }
5481
5482 /**
5483 * Determines if a character is defined in Unicode.
5484 * <p>
5485 * A character is defined if at least one of the following is true:
5486 * <ul>
5487 * <li>It has an entry in the UnicodeData file.
5488 * <li>It has a value in a range defined by the UnicodeData file.
5489 * </ul>
5490 *
5491 * <p><b>Note:</b> This method cannot handle <a
5492 * href="#supplementary"> supplementary characters</a>. To support
5493 * all Unicode characters, including supplementary characters, use
5494 * the {@link #isDefined(int)} method.
5495 *
5496 * @param ch the character to be tested
5497 * @return {@code true} if the character has a defined meaning
5498 * in Unicode; {@code false} otherwise.
5499 * @see Character#isDigit(char)
5500 * @see Character#isLetter(char)
5501 * @see Character#isLetterOrDigit(char)
5502 * @see Character#isLowerCase(char)
5503 * @see Character#isTitleCase(char)
5504 * @see Character#isUpperCase(char)
5505 * @since 1.0.2
5506 */
5507 public static boolean isDefined(char ch) {
5508 return isDefined((int)ch);
5509 }
5510
5511 /**
5512 * Determines if a character (Unicode code point) is defined in Unicode.
5513 * <p>
5514 * A character is defined if at least one of the following is true:
5515 * <ul>
5516 * <li>It has an entry in the UnicodeData file.
5517 * <li>It has a value in a range defined by the UnicodeData file.
5518 * </ul>
5519 *
5520 * @param codePoint the character (Unicode code point) to be tested.
5521 * @return {@code true} if the character has a defined meaning
5522 * in Unicode; {@code false} otherwise.
5523 * @see Character#isDigit(int)
5524 * @see Character#isLetter(int)
5525 * @see Character#isLetterOrDigit(int)
5526 * @see Character#isLowerCase(int)
5527 * @see Character#isTitleCase(int)
5528 * @see Character#isUpperCase(int)
5529 * @since 1.5
5530 */
5531 public static boolean isDefined(int codePoint) {
5532 return getType(codePoint) != Character.UNASSIGNED;
5533 }
5534
5535 /**
5536 * Determines if the specified character is a letter.
5537 * <p>
5538 * A character is considered to be a letter if its general
5539 * category type, provided by {@code Character.getType(ch)},
5540 * is any of the following:
5541 * <ul>
5542 * <li> {@code UPPERCASE_LETTER}
5543 * <li> {@code LOWERCASE_LETTER}
5544 * <li> {@code TITLECASE_LETTER}
5545 * <li> {@code MODIFIER_LETTER}
5546 * <li> {@code OTHER_LETTER}
5547 * </ul>
5548 *
5549 * Not all letters have case. Many characters are
5550 * letters but are neither uppercase nor lowercase nor titlecase.
5551 *
5552 * <p><b>Note:</b> This method cannot handle <a
5553 * href="#supplementary"> supplementary characters</a>. To support
5554 * all Unicode characters, including supplementary characters, use
5555 * the {@link #isLetter(int)} method.
5556 *
5557 * @param ch the character to be tested.
5558 * @return {@code true} if the character is a letter;
5559 * {@code false} otherwise.
5560 * @see Character#isDigit(char)
5561 * @see Character#isJavaIdentifierStart(char)
5562 * @see Character#isJavaLetter(char)
5563 * @see Character#isJavaLetterOrDigit(char)
5564 * @see Character#isLetterOrDigit(char)
5565 * @see Character#isLowerCase(char)
5566 * @see Character#isTitleCase(char)
5567 * @see Character#isUnicodeIdentifierStart(char)
5568 * @see Character#isUpperCase(char)
5569 */
5570 public static boolean isLetter(char ch) {
5571 return isLetter((int)ch);
5572 }
5573
5574 /**
5575 * Determines if the specified character (Unicode code point) is a letter.
5576 * <p>
5577 * A character is considered to be a letter if its general
5578 * category type, provided by {@link Character#getType(int) getType(codePoint)},
5579 * is any of the following:
5580 * <ul>
5581 * <li> {@code UPPERCASE_LETTER}
5582 * <li> {@code LOWERCASE_LETTER}
5583 * <li> {@code TITLECASE_LETTER}
5584 * <li> {@code MODIFIER_LETTER}
5585 * <li> {@code OTHER_LETTER}
5586 * </ul>
5587 *
5588 * Not all letters have case. Many characters are
5589 * letters but are neither uppercase nor lowercase nor titlecase.
5590 *
5591 * @param codePoint the character (Unicode code point) to be tested.
5592 * @return {@code true} if the character is a letter;
5593 * {@code false} otherwise.
5594 * @see Character#isDigit(int)
5595 * @see Character#isJavaIdentifierStart(int)
5596 * @see Character#isLetterOrDigit(int)
5597 * @see Character#isLowerCase(int)
5598 * @see Character#isTitleCase(int)
5599 * @see Character#isUnicodeIdentifierStart(int)
5600 * @see Character#isUpperCase(int)
5601 * @since 1.5
5602 */
5603 public static boolean isLetter(int codePoint) {
5604 return ((((1 << Character.UPPERCASE_LETTER) |
5605 (1 << Character.LOWERCASE_LETTER) |
5606 (1 << Character.TITLECASE_LETTER) |
5607 (1 << Character.MODIFIER_LETTER) |
5608 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5609 != 0;
5610 }
5611
5612 /**
5613 * Determines if the specified character is a letter or digit.
5614 * <p>
5615 * A character is considered to be a letter or digit if either
5616 * {@code Character.isLetter(char ch)} or
5617 * {@code Character.isDigit(char ch)} returns
5618 * {@code true} for the character.
5619 *
5620 * <p><b>Note:</b> This method cannot handle <a
5621 * href="#supplementary"> supplementary characters</a>. To support
5622 * all Unicode characters, including supplementary characters, use
5623 * the {@link #isLetterOrDigit(int)} method.
5624 *
5625 * @param ch the character to be tested.
5626 * @return {@code true} if the character is a letter or digit;
5627 * {@code false} otherwise.
5628 * @see Character#isDigit(char)
5629 * @see Character#isJavaIdentifierPart(char)
5630 * @see Character#isJavaLetter(char)
5631 * @see Character#isJavaLetterOrDigit(char)
5632 * @see Character#isLetter(char)
5633 * @see Character#isUnicodeIdentifierPart(char)
5634 * @since 1.0.2
5635 */
5636 public static boolean isLetterOrDigit(char ch) {
5637 return isLetterOrDigit((int)ch);
5638 }
5639
5640 /**
5641 * Determines if the specified character (Unicode code point) is a letter or digit.
5642 * <p>
5643 * A character is considered to be a letter or digit if either
5644 * {@link #isLetter(int) isLetter(codePoint)} or
5645 * {@link #isDigit(int) isDigit(codePoint)} returns
5646 * {@code true} for the character.
5647 *
5648 * @param codePoint the character (Unicode code point) to be tested.
5649 * @return {@code true} if the character is a letter or digit;
5650 * {@code false} otherwise.
5651 * @see Character#isDigit(int)
5652 * @see Character#isJavaIdentifierPart(int)
5653 * @see Character#isLetter(int)
5654 * @see Character#isUnicodeIdentifierPart(int)
5655 * @since 1.5
5656 */
5657 public static boolean isLetterOrDigit(int codePoint) {
5658 return ((((1 << Character.UPPERCASE_LETTER) |
5659 (1 << Character.LOWERCASE_LETTER) |
5660 (1 << Character.TITLECASE_LETTER) |
5661 (1 << Character.MODIFIER_LETTER) |
5662 (1 << Character.OTHER_LETTER) |
5663 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5664 != 0;
5665 }
5666
5667 /**
5668 * Determines if the specified character is permissible as the first
5669 * character in a Java identifier.
5670 * <p>
5671 * A character may start a Java identifier if and only if
5672 * one of the following is true:
5673 * <ul>
5674 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5675 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5676 * <li> {@code ch} is a currency symbol (such as {@code '$'})
5677 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5678 * </ul>
5679 *
5680 * @param ch the character to be tested.
5681 * @return {@code true} if the character may start a Java
5682 * identifier; {@code false} otherwise.
5683 * @see Character#isJavaLetterOrDigit(char)
5684 * @see Character#isJavaIdentifierStart(char)
5685 * @see Character#isJavaIdentifierPart(char)
5686 * @see Character#isLetter(char)
5687 * @see Character#isLetterOrDigit(char)
5688 * @see Character#isUnicodeIdentifierStart(char)
5689 * @since 1.02
5690 * @deprecated Replaced by isJavaIdentifierStart(char).
5691 */
5692 @Deprecated
5693 public static boolean isJavaLetter(char ch) {
5694 return isJavaIdentifierStart(ch);
5695 }
5696
5697 /**
5698 * Determines if the specified character may be part of a Java
5699 * identifier as other than the first character.
5700 * <p>
5701 * A character may be part of a Java identifier if and only if any
5702 * of the following are true:
5703 * <ul>
5704 * <li> it is a letter
5705 * <li> it is a currency symbol (such as {@code '$'})
5706 * <li> it is a connecting punctuation character (such as {@code '_'})
5707 * <li> it is a digit
5708 * <li> it is a numeric letter (such as a Roman numeral character)
5709 * <li> it is a combining mark
5710 * <li> it is a non-spacing mark
5711 * <li> {@code isIdentifierIgnorable} returns
5712 * {@code true} for the character.
5713 * </ul>
5714 *
5715 * @param ch the character to be tested.
5716 * @return {@code true} if the character may be part of a
5717 * Java identifier; {@code false} otherwise.
5718 * @see Character#isJavaLetter(char)
5719 * @see Character#isJavaIdentifierStart(char)
5720 * @see Character#isJavaIdentifierPart(char)
5721 * @see Character#isLetter(char)
5722 * @see Character#isLetterOrDigit(char)
5723 * @see Character#isUnicodeIdentifierPart(char)
5724 * @see Character#isIdentifierIgnorable(char)
5725 * @since 1.02
5726 * @deprecated Replaced by isJavaIdentifierPart(char).
5727 */
5728 @Deprecated
5729 public static boolean isJavaLetterOrDigit(char ch) {
5730 return isJavaIdentifierPart(ch);
5731 }
5732
5733 /**
5734 * Determines if the specified character (Unicode code point) is an alphabet.
5735 * <p>
5736 * A character is considered to be alphabetic if its general category type,
5737 * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5738 * the following:
5739 * <ul>
5740 * <li> <code>UPPERCASE_LETTER</code>
5741 * <li> <code>LOWERCASE_LETTER</code>
5742 * <li> <code>TITLECASE_LETTER</code>
5743 * <li> <code>MODIFIER_LETTER</code>
5744 * <li> <code>OTHER_LETTER</code>
5745 * <li> <code>LETTER_NUMBER</code>
5746 * </ul>
5747 * or it has contributory property Other_Alphabetic as defined by the
5748 * Unicode Standard.
5749 *
5750 * @param codePoint the character (Unicode code point) to be tested.
5751 * @return <code>true</code> if the character is a Unicode alphabet
5752 * character, <code>false</code> otherwise.
5753 * @since 1.7
5754 */
5755 public static boolean isAlphabetic(int codePoint) {
5756 return (((((1 << Character.UPPERCASE_LETTER) |
5757 (1 << Character.LOWERCASE_LETTER) |
5758 (1 << Character.TITLECASE_LETTER) |
5759 (1 << Character.MODIFIER_LETTER) |
5760 (1 << Character.OTHER_LETTER) |
5761 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5762 CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5763 }
5764
5765 /**
5766 * Determines if the specified character (Unicode code point) is a CJKV
5767 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5768 * the Unicode Standard.
5769 *
5770 * @param codePoint the character (Unicode code point) to be tested.
5771 * @return <code>true</code> if the character is a Unicode ideograph
5772 * character, <code>false</code> otherwise.
5773 * @since 1.7
5774 */
5775 public static boolean isIdeographic(int codePoint) {
5776 return CharacterData.of(codePoint).isIdeographic(codePoint);
5777 }
5778
5779 /**
5780 * Determines if the specified character is
5781 * permissible as the first character in a Java identifier.
5782 * <p>
5783 * A character may start a Java identifier if and only if
5784 * one of the following conditions is true:
5785 * <ul>
5786 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5787 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5788 * <li> {@code ch} is a currency symbol (such as {@code '$'})
5789 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5790 * </ul>
5791 *
5792 * <p><b>Note:</b> This method cannot handle <a
5793 * href="#supplementary"> supplementary characters</a>. To support
5794 * all Unicode characters, including supplementary characters, use
5795 * the {@link #isJavaIdentifierStart(int)} method.
5796 *
5797 * @param ch the character to be tested.
5798 * @return {@code true} if the character may start a Java identifier;
5799 * {@code false} otherwise.
5800 * @see Character#isJavaIdentifierPart(char)
5801 * @see Character#isLetter(char)
5802 * @see Character#isUnicodeIdentifierStart(char)
5803 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5804 * @since 1.1
5805 */
5806 public static boolean isJavaIdentifierStart(char ch) {
5807 return isJavaIdentifierStart((int)ch);
5808 }
5809
5810 /**
5811 * Determines if the character (Unicode code point) is
5812 * permissible as the first character in a Java identifier.
5813 * <p>
5814 * A character may start a Java identifier if and only if
5815 * one of the following conditions is true:
5816 * <ul>
5817 * <li> {@link #isLetter(int) isLetter(codePoint)}
5818 * returns {@code true}
5819 * <li> {@link #getType(int) getType(codePoint)}
5820 * returns {@code LETTER_NUMBER}
5821 * <li> the referenced character is a currency symbol (such as {@code '$'})
5822 * <li> the referenced character is a connecting punctuation character
5823 * (such as {@code '_'}).
5824 * </ul>
5825 *
5826 * @param codePoint the character (Unicode code point) to be tested.
5827 * @return {@code true} if the character may start a Java identifier;
5828 * {@code false} otherwise.
5829 * @see Character#isJavaIdentifierPart(int)
5830 * @see Character#isLetter(int)
5831 * @see Character#isUnicodeIdentifierStart(int)
5832 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5833 * @since 1.5
5834 */
5835 public static boolean isJavaIdentifierStart(int codePoint) {
5836 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
5837 }
5838
5839 /**
5840 * Determines if the specified character may be part of a Java
5841 * identifier as other than the first character.
5842 * <p>
5843 * A character may be part of a Java identifier if any of the following
5844 * are true:
5845 * <ul>
5846 * <li> it is a letter
5847 * <li> it is a currency symbol (such as {@code '$'})
5848 * <li> it is a connecting punctuation character (such as {@code '_'})
5849 * <li> it is a digit
5850 * <li> it is a numeric letter (such as a Roman numeral character)
5851 * <li> it is a combining mark
5852 * <li> it is a non-spacing mark
5853 * <li> {@code isIdentifierIgnorable} returns
5854 * {@code true} for the character
5855 * </ul>
5856 *
5857 * <p><b>Note:</b> This method cannot handle <a
5858 * href="#supplementary"> supplementary characters</a>. To support
5859 * all Unicode characters, including supplementary characters, use
5860 * the {@link #isJavaIdentifierPart(int)} method.
5861 *
5862 * @param ch the character to be tested.
5863 * @return {@code true} if the character may be part of a
5864 * Java identifier; {@code false} otherwise.
5865 * @see Character#isIdentifierIgnorable(char)
5866 * @see Character#isJavaIdentifierStart(char)
5867 * @see Character#isLetterOrDigit(char)
5868 * @see Character#isUnicodeIdentifierPart(char)
5869 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5870 * @since 1.1
5871 */
5872 public static boolean isJavaIdentifierPart(char ch) {
5873 return isJavaIdentifierPart((int)ch);
5874 }
5875
5876 /**
5877 * Determines if the character (Unicode code point) may be part of a Java
5878 * identifier as other than the first character.
5879 * <p>
5880 * A character may be part of a Java identifier if any of the following
5881 * are true:
5882 * <ul>
5883 * <li> it is a letter
5884 * <li> it is a currency symbol (such as {@code '$'})
5885 * <li> it is a connecting punctuation character (such as {@code '_'})
5886 * <li> it is a digit
5887 * <li> it is a numeric letter (such as a Roman numeral character)
5888 * <li> it is a combining mark
5889 * <li> it is a non-spacing mark
5890 * <li> {@link #isIdentifierIgnorable(int)
5891 * isIdentifierIgnorable(codePoint)} returns {@code true} for
5892 * the character
5893 * </ul>
5894 *
5895 * @param codePoint the character (Unicode code point) to be tested.
5896 * @return {@code true} if the character may be part of a
5897 * Java identifier; {@code false} otherwise.
5898 * @see Character#isIdentifierIgnorable(int)
5899 * @see Character#isJavaIdentifierStart(int)
5900 * @see Character#isLetterOrDigit(int)
5901 * @see Character#isUnicodeIdentifierPart(int)
5902 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5903 * @since 1.5
5904 */
5905 public static boolean isJavaIdentifierPart(int codePoint) {
5906 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
5907 }
5908
5909 /**
5910 * Determines if the specified character is permissible as the
5911 * first character in a Unicode identifier.
5912 * <p>
5913 * A character may start a Unicode identifier if and only if
5914 * one of the following conditions is true:
5915 * <ul>
5916 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5917 * <li> {@link #getType(char) getType(ch)} returns
5918 * {@code LETTER_NUMBER}.
5919 * </ul>
5920 *
5921 * <p><b>Note:</b> This method cannot handle <a
5922 * href="#supplementary"> supplementary characters</a>. To support
5923 * all Unicode characters, including supplementary characters, use
5924 * the {@link #isUnicodeIdentifierStart(int)} method.
5925 *
5926 * @param ch the character to be tested.
5927 * @return {@code true} if the character may start a Unicode
5928 * identifier; {@code false} otherwise.
5929 * @see Character#isJavaIdentifierStart(char)
5930 * @see Character#isLetter(char)
5931 * @see Character#isUnicodeIdentifierPart(char)
5932 * @since 1.1
5933 */
5934 public static boolean isUnicodeIdentifierStart(char ch) {
5935 return isUnicodeIdentifierStart((int)ch);
5936 }
5937
5938 /**
5939 * Determines if the specified character (Unicode code point) is permissible as the
5940 * first character in a Unicode identifier.
5941 * <p>
5942 * A character may start a Unicode identifier if and only if
5943 * one of the following conditions is true:
5944 * <ul>
5945 * <li> {@link #isLetter(int) isLetter(codePoint)}
5946 * returns {@code true}
5947 * <li> {@link #getType(int) getType(codePoint)}
5948 * returns {@code LETTER_NUMBER}.
5949 * </ul>
5950 * @param codePoint the character (Unicode code point) to be tested.
5951 * @return {@code true} if the character may start a Unicode
5952 * identifier; {@code false} otherwise.
5953 * @see Character#isJavaIdentifierStart(int)
5954 * @see Character#isLetter(int)
5955 * @see Character#isUnicodeIdentifierPart(int)
5956 * @since 1.5
5957 */
5958 public static boolean isUnicodeIdentifierStart(int codePoint) {
5959 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
5960 }
5961
5962 /**
5963 * Determines if the specified character may be part of a Unicode
5964 * identifier as other than the first character.
5965 * <p>
5966 * A character may be part of a Unicode identifier if and only if
5967 * one of the following statements is true:
5968 * <ul>
5969 * <li> it is a letter
5970 * <li> it is a connecting punctuation character (such as {@code '_'})
5971 * <li> it is a digit
5972 * <li> it is a numeric letter (such as a Roman numeral character)
5973 * <li> it is a combining mark
5974 * <li> it is a non-spacing mark
5975 * <li> {@code isIdentifierIgnorable} returns
5976 * {@code true} for this character.
5977 * </ul>
5978 *
5979 * <p><b>Note:</b> This method cannot handle <a
5980 * href="#supplementary"> supplementary characters</a>. To support
5981 * all Unicode characters, including supplementary characters, use
5982 * the {@link #isUnicodeIdentifierPart(int)} method.
5983 *
5984 * @param ch the character to be tested.
5985 * @return {@code true} if the character may be part of a
5986 * Unicode identifier; {@code false} otherwise.
5987 * @see Character#isIdentifierIgnorable(char)
5988 * @see Character#isJavaIdentifierPart(char)
5989 * @see Character#isLetterOrDigit(char)
5990 * @see Character#isUnicodeIdentifierStart(char)
5991 * @since 1.1
5992 */
5993 public static boolean isUnicodeIdentifierPart(char ch) {
5994 return isUnicodeIdentifierPart((int)ch);
5995 }
5996
5997 /**
5998 * Determines if the specified character (Unicode code point) may be part of a Unicode
5999 * identifier as other than the first character.
6000 * <p>
6001 * A character may be part of a Unicode identifier if and only if
6002 * one of the following statements is true:
6003 * <ul>
6004 * <li> it is a letter
6005 * <li> it is a connecting punctuation character (such as {@code '_'})
6006 * <li> it is a digit
6007 * <li> it is a numeric letter (such as a Roman numeral character)
6008 * <li> it is a combining mark
6009 * <li> it is a non-spacing mark
6010 * <li> {@code isIdentifierIgnorable} returns
6011 * {@code true} for this character.
6012 * </ul>
6013 * @param codePoint the character (Unicode code point) to be tested.
6014 * @return {@code true} if the character may be part of a
6015 * Unicode identifier; {@code false} otherwise.
6016 * @see Character#isIdentifierIgnorable(int)
6017 * @see Character#isJavaIdentifierPart(int)
6018 * @see Character#isLetterOrDigit(int)
6019 * @see Character#isUnicodeIdentifierStart(int)
6020 * @since 1.5
6021 */
6022 public static boolean isUnicodeIdentifierPart(int codePoint) {
6023 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6024 }
6025
6026 /**
6027 * Determines if the specified character should be regarded as
6028 * an ignorable character in a Java identifier or a Unicode identifier.
6029 * <p>
6030 * The following Unicode characters are ignorable in a Java identifier
6031 * or a Unicode identifier:
6032 * <ul>
6033 * <li>ISO control characters that are not whitespace
6034 * <ul>
6035 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6036 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6037 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6038 * </ul>
6039 *
6040 * <li>all characters that have the {@code FORMAT} general
6041 * category value
6042 * </ul>
6043 *
6044 * <p><b>Note:</b> This method cannot handle <a
6045 * href="#supplementary"> supplementary characters</a>. To support
6046 * all Unicode characters, including supplementary characters, use
6047 * the {@link #isIdentifierIgnorable(int)} method.
6048 *
6049 * @param ch the character to be tested.
6050 * @return {@code true} if the character is an ignorable control
6051 * character that may be part of a Java or Unicode identifier;
6052 * {@code false} otherwise.
6053 * @see Character#isJavaIdentifierPart(char)
6054 * @see Character#isUnicodeIdentifierPart(char)
6055 * @since 1.1
6056 */
6057 public static boolean isIdentifierIgnorable(char ch) {
6058 return isIdentifierIgnorable((int)ch);
6059 }
6060
6061 /**
6062 * Determines if the specified character (Unicode code point) should be regarded as
6063 * an ignorable character in a Java identifier or a Unicode identifier.
6064 * <p>
6065 * The following Unicode characters are ignorable in a Java identifier
6066 * or a Unicode identifier:
6067 * <ul>
6068 * <li>ISO control characters that are not whitespace
6069 * <ul>
6070 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6071 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6072 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6073 * </ul>
6074 *
6075 * <li>all characters that have the {@code FORMAT} general
6076 * category value
6077 * </ul>
6078 *
6079 * @param codePoint the character (Unicode code point) to be tested.
6080 * @return {@code true} if the character is an ignorable control
6081 * character that may be part of a Java or Unicode identifier;
6082 * {@code false} otherwise.
6083 * @see Character#isJavaIdentifierPart(int)
6084 * @see Character#isUnicodeIdentifierPart(int)
6085 * @since 1.5
6086 */
6087 public static boolean isIdentifierIgnorable(int codePoint) {
6088 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6089 }
6090
6091 /**
6092 * Converts the character argument to lowercase using case
6093 * mapping information from the UnicodeData file.
6094 * <p>
6095 * Note that
6096 * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6097 * does not always return {@code true} for some ranges of
6098 * characters, particularly those that are symbols or ideographs.
6099 *
6100 * <p>In general, {@link String#toLowerCase()} should be used to map
6101 * characters to lowercase. {@code String} case mapping methods
6102 * have several benefits over {@code Character} case mapping methods.
6103 * {@code String} case mapping methods can perform locale-sensitive
6104 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6105 * the {@code Character} case mapping methods cannot.
6106 *
6107 * <p><b>Note:</b> This method cannot handle <a
6108 * href="#supplementary"> supplementary characters</a>. To support
6109 * all Unicode characters, including supplementary characters, use
6110 * the {@link #toLowerCase(int)} method.
6111 *
6112 * @param ch the character to be converted.
6113 * @return the lowercase equivalent of the character, if any;
6114 * otherwise, the character itself.
6115 * @see Character#isLowerCase(char)
6116 * @see String#toLowerCase()
6117 */
6118 public static char toLowerCase(char ch) {
6119 return (char)toLowerCase((int)ch);
6120 }
6121
6122 /**
6123 * Converts the character (Unicode code point) argument to
6124 * lowercase using case mapping information from the UnicodeData
6125 * file.
6126 *
6127 * <p> Note that
6128 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6129 * does not always return {@code true} for some ranges of
6130 * characters, particularly those that are symbols or ideographs.
6131 *
6132 * <p>In general, {@link String#toLowerCase()} should be used to map
6133 * characters to lowercase. {@code String} case mapping methods
6134 * have several benefits over {@code Character} case mapping methods.
6135 * {@code String} case mapping methods can perform locale-sensitive
6136 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6137 * the {@code Character} case mapping methods cannot.
6138 *
6139 * @param codePoint the character (Unicode code point) to be converted.
6140 * @return the lowercase equivalent of the character (Unicode code
6141 * point), if any; otherwise, the character itself.
6142 * @see Character#isLowerCase(int)
6143 * @see String#toLowerCase()
6144 *
6145 * @since 1.5
6146 */
6147 public static int toLowerCase(int codePoint) {
6148 return CharacterData.of(codePoint).toLowerCase(codePoint);
6149 }
6150
6151 /**
6152 * Converts the character argument to uppercase using case mapping
6153 * information from the UnicodeData file.
6154 * <p>
6155 * Note that
6156 * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6157 * does not always return {@code true} for some ranges of
6158 * characters, particularly those that are symbols or ideographs.
6159 *
6160 * <p>In general, {@link String#toUpperCase()} should be used to map
6161 * characters to uppercase. {@code String} case mapping methods
6162 * have several benefits over {@code Character} case mapping methods.
6163 * {@code String} case mapping methods can perform locale-sensitive
6164 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6165 * the {@code Character} case mapping methods cannot.
6166 *
6167 * <p><b>Note:</b> This method cannot handle <a
6168 * href="#supplementary"> supplementary characters</a>. To support
6169 * all Unicode characters, including supplementary characters, use
6170 * the {@link #toUpperCase(int)} method.
6171 *
6172 * @param ch the character to be converted.
6173 * @return the uppercase equivalent of the character, if any;
6174 * otherwise, the character itself.
6175 * @see Character#isUpperCase(char)
6176 * @see String#toUpperCase()
6177 */
6178 public static char toUpperCase(char ch) {
6179 return (char)toUpperCase((int)ch);
6180 }
6181
6182 /**
6183 * Converts the character (Unicode code point) argument to
6184 * uppercase using case mapping information from the UnicodeData
6185 * file.
6186 *
6187 * <p>Note that
6188 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6189 * does not always return {@code true} for some ranges of
6190 * characters, particularly those that are symbols or ideographs.
6191 *
6192 * <p>In general, {@link String#toUpperCase()} should be used to map
6193 * characters to uppercase. {@code String} case mapping methods
6194 * have several benefits over {@code Character} case mapping methods.
6195 * {@code String} case mapping methods can perform locale-sensitive
6196 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6197 * the {@code Character} case mapping methods cannot.
6198 *
6199 * @param codePoint the character (Unicode code point) to be converted.
6200 * @return the uppercase equivalent of the character, if any;
6201 * otherwise, the character itself.
6202 * @see Character#isUpperCase(int)
6203 * @see String#toUpperCase()
6204 *
6205 * @since 1.5
6206 */
6207 public static int toUpperCase(int codePoint) {
6208 return CharacterData.of(codePoint).toUpperCase(codePoint);
6209 }
6210
6211 /**
6212 * Converts the character argument to titlecase using case mapping
6213 * information from the UnicodeData file. If a character has no
6214 * explicit titlecase mapping and is not itself a titlecase char
6215 * according to UnicodeData, then the uppercase mapping is
6216 * returned as an equivalent titlecase mapping. If the
6217 * {@code char} argument is already a titlecase
6218 * {@code char}, the same {@code char} value will be
6219 * returned.
6220 * <p>
6221 * Note that
6222 * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6223 * does not always return {@code true} for some ranges of
6224 * characters.
6225 *
6226 * <p><b>Note:</b> This method cannot handle <a
6227 * href="#supplementary"> supplementary characters</a>. To support
6228 * all Unicode characters, including supplementary characters, use
6229 * the {@link #toTitleCase(int)} method.
6230 *
6231 * @param ch the character to be converted.
6232 * @return the titlecase equivalent of the character, if any;
6233 * otherwise, the character itself.
6234 * @see Character#isTitleCase(char)
6235 * @see Character#toLowerCase(char)
6236 * @see Character#toUpperCase(char)
6237 * @since 1.0.2
6238 */
6239 public static char toTitleCase(char ch) {
6240 return (char)toTitleCase((int)ch);
6241 }
6242
6243 /**
6244 * Converts the character (Unicode code point) argument to titlecase using case mapping
6245 * information from the UnicodeData file. If a character has no
6246 * explicit titlecase mapping and is not itself a titlecase char
6247 * according to UnicodeData, then the uppercase mapping is
6248 * returned as an equivalent titlecase mapping. If the
6249 * character argument is already a titlecase
6250 * character, the same character value will be
6251 * returned.
6252 *
6253 * <p>Note that
6254 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6255 * does not always return {@code true} for some ranges of
6256 * characters.
6257 *
6258 * @param codePoint the character (Unicode code point) to be converted.
6259 * @return the titlecase equivalent of the character, if any;
6260 * otherwise, the character itself.
6261 * @see Character#isTitleCase(int)
6262 * @see Character#toLowerCase(int)
6263 * @see Character#toUpperCase(int)
6264 * @since 1.5
6265 */
6266 public static int toTitleCase(int codePoint) {
6267 return CharacterData.of(codePoint).toTitleCase(codePoint);
6268 }
6269
6270 /**
6271 * Returns the numeric value of the character {@code ch} in the
6272 * specified radix.
6273 * <p>
6274 * If the radix is not in the range {@code MIN_RADIX} ≤
6275 * {@code radix} ≤ {@code MAX_RADIX} or if the
6276 * value of {@code ch} is not a valid digit in the specified
6277 * radix, {@code -1} is returned. A character is a valid digit
6278 * if at least one of the following is true:
6279 * <ul>
6280 * <li>The method {@code isDigit} is {@code true} of the character
6281 * and the Unicode decimal digit value of the character (or its
6282 * single-character decomposition) is less than the specified radix.
6283 * In this case the decimal digit value is returned.
6284 * <li>The character is one of the uppercase Latin letters
6285 * {@code 'A'} through {@code 'Z'} and its code is less than
6286 * {@code radix + 'A' - 10}.
6287 * In this case, {@code ch - 'A' + 10}
6288 * is returned.
6289 * <li>The character is one of the lowercase Latin letters
6290 * {@code 'a'} through {@code 'z'} and its code is less than
6291 * {@code radix + 'a' - 10}.
6292 * In this case, {@code ch - 'a' + 10}
6293 * is returned.
6294 * <li>The character is one of the fullwidth uppercase Latin letters A
6295 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6296 * and its code is less than
6297 * {@code radix + '\u005CuFF21' - 10}.
6298 * In this case, {@code ch - '\u005CuFF21' + 10}
6299 * is returned.
6300 * <li>The character is one of the fullwidth lowercase Latin letters a
6301 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6302 * and its code is less than
6303 * {@code radix + '\u005CuFF41' - 10}.
6304 * In this case, {@code ch - '\u005CuFF41' + 10}
6305 * is returned.
6306 * </ul>
6307 *
6308 * <p><b>Note:</b> This method cannot handle <a
6309 * href="#supplementary"> supplementary characters</a>. To support
6310 * all Unicode characters, including supplementary characters, use
6311 * the {@link #digit(int, int)} method.
6312 *
6313 * @param ch the character to be converted.
6314 * @param radix the radix.
6315 * @return the numeric value represented by the character in the
6316 * specified radix.
6317 * @see Character#forDigit(int, int)
6318 * @see Character#isDigit(char)
6319 */
6320 public static int digit(char ch, int radix) {
6321 return digit((int)ch, radix);
6322 }
6323
6324 /**
6325 * Returns the numeric value of the specified character (Unicode
6326 * code point) in the specified radix.
6327 *
6328 * <p>If the radix is not in the range {@code MIN_RADIX} ≤
6329 * {@code radix} ≤ {@code MAX_RADIX} or if the
6330 * character is not a valid digit in the specified
6331 * radix, {@code -1} is returned. A character is a valid digit
6332 * if at least one of the following is true:
6333 * <ul>
6334 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6335 * and the Unicode decimal digit value of the character (or its
6336 * single-character decomposition) is less than the specified radix.
6337 * In this case the decimal digit value is returned.
6338 * <li>The character is one of the uppercase Latin letters
6339 * {@code 'A'} through {@code 'Z'} and its code is less than
6340 * {@code radix + 'A' - 10}.
6341 * In this case, {@code codePoint - 'A' + 10}
6342 * is returned.
6343 * <li>The character is one of the lowercase Latin letters
6344 * {@code 'a'} through {@code 'z'} and its code is less than
6345 * {@code radix + 'a' - 10}.
6346 * In this case, {@code codePoint - 'a' + 10}
6347 * is returned.
6348 * <li>The character is one of the fullwidth uppercase Latin letters A
6349 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6350 * and its code is less than
6351 * {@code radix + '\u005CuFF21' - 10}.
6352 * In this case,
6353 * {@code codePoint - '\u005CuFF21' + 10}
6354 * is returned.
6355 * <li>The character is one of the fullwidth lowercase Latin letters a
6356 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6357 * and its code is less than
6358 * {@code radix + '\u005CuFF41'- 10}.
6359 * In this case,
6360 * {@code codePoint - '\u005CuFF41' + 10}
6361 * is returned.
6362 * </ul>
6363 *
6364 * @param codePoint the character (Unicode code point) to be converted.
6365 * @param radix the radix.
6366 * @return the numeric value represented by the character in the
6367 * specified radix.
6368 * @see Character#forDigit(int, int)
6369 * @see Character#isDigit(int)
6370 * @since 1.5
6371 */
6372 public static int digit(int codePoint, int radix) {
6373 return CharacterData.of(codePoint).digit(codePoint, radix);
6374 }
6375
6376 /**
6377 * Returns the {@code int} value that the specified Unicode
6378 * character represents. For example, the character
6379 * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6380 * an int with a value of 50.
6381 * <p>
6382 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6383 * {@code '\u005Cu005A'}), lowercase
6384 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6385 * full width variant ({@code '\u005CuFF21'} through
6386 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6387 * {@code '\u005CuFF5A'}) forms have numeric values from 10
6388 * through 35. This is independent of the Unicode specification,
6389 * which does not assign numeric values to these {@code char}
6390 * values.
6391 * <p>
6392 * If the character does not have a numeric value, then -1 is returned.
6393 * If the character has a numeric value that cannot be represented as a
6394 * nonnegative integer (for example, a fractional value), then -2
6395 * is returned.
6396 *
6397 * <p><b>Note:</b> This method cannot handle <a
6398 * href="#supplementary"> supplementary characters</a>. To support
6399 * all Unicode characters, including supplementary characters, use
6400 * the {@link #getNumericValue(int)} method.
6401 *
6402 * @param ch the character to be converted.
6403 * @return the numeric value of the character, as a nonnegative {@code int}
6404 * value; -2 if the character has a numeric value that is not a
6405 * nonnegative integer; -1 if the character has no numeric value.
6406 * @see Character#forDigit(int, int)
6407 * @see Character#isDigit(char)
6408 * @since 1.1
6409 */
6410 public static int getNumericValue(char ch) {
6411 return getNumericValue((int)ch);
6412 }
6413
6414 /**
6415 * Returns the {@code int} value that the specified
6416 * character (Unicode code point) represents. For example, the character
6417 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6418 * an {@code int} with a value of 50.
6419 * <p>
6420 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6421 * {@code '\u005Cu005A'}), lowercase
6422 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6423 * full width variant ({@code '\u005CuFF21'} through
6424 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6425 * {@code '\u005CuFF5A'}) forms have numeric values from 10
6426 * through 35. This is independent of the Unicode specification,
6427 * which does not assign numeric values to these {@code char}
6428 * values.
6429 * <p>
6430 * If the character does not have a numeric value, then -1 is returned.
6431 * If the character has a numeric value that cannot be represented as a
6432 * nonnegative integer (for example, a fractional value), then -2
6433 * is returned.
6434 *
6435 * @param codePoint the character (Unicode code point) to be converted.
6436 * @return the numeric value of the character, as a nonnegative {@code int}
6437 * value; -2 if the character has a numeric value that is not a
6438 * nonnegative integer; -1 if the character has no numeric value.
6439 * @see Character#forDigit(int, int)
6440 * @see Character#isDigit(int)
6441 * @since 1.5
6442 */
6443 public static int getNumericValue(int codePoint) {
6444 return CharacterData.of(codePoint).getNumericValue(codePoint);
6445 }
6446
6447 /**
6448 * Determines if the specified character is ISO-LATIN-1 white space.
6449 * This method returns {@code true} for the following five
6450 * characters only:
6451 * <table>
6452 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
6453 * <td>{@code HORIZONTAL TABULATION}</td></tr>
6454 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
6455 * <td>{@code NEW LINE}</td></tr>
6456 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
6457 * <td>{@code FORM FEED}</td></tr>
6458 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
6459 * <td>{@code CARRIAGE RETURN}</td></tr>
6460 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
6461 * <td>{@code SPACE}</td></tr>
6462 * </table>
6463 *
6464 * @param ch the character to be tested.
6465 * @return {@code true} if the character is ISO-LATIN-1 white
6466 * space; {@code false} otherwise.
6467 * @see Character#isSpaceChar(char)
6468 * @see Character#isWhitespace(char)
6469 * @deprecated Replaced by isWhitespace(char).
6470 */
6471 @Deprecated
6472 public static boolean isSpace(char ch) {
6473 return (ch <= 0x0020) &&
6474 (((((1L << 0x0009) |
6475 (1L << 0x000A) |
6476 (1L << 0x000C) |
6477 (1L << 0x000D) |
6478 (1L << 0x0020)) >> ch) & 1L) != 0);
6479 }
6480
6481
6482 /**
6483 * Determines if the specified character is a Unicode space character.
6484 * A character is considered to be a space character if and only if
6485 * it is specified to be a space character by the Unicode Standard. This
6486 * method returns true if the character's general category type is any of
6487 * the following:
6488 * <ul>
6489 * <li> {@code SPACE_SEPARATOR}
6490 * <li> {@code LINE_SEPARATOR}
6491 * <li> {@code PARAGRAPH_SEPARATOR}
6492 * </ul>
6493 *
6494 * <p><b>Note:</b> This method cannot handle <a
6495 * href="#supplementary"> supplementary characters</a>. To support
6496 * all Unicode characters, including supplementary characters, use
6497 * the {@link #isSpaceChar(int)} method.
6498 *
6499 * @param ch the character to be tested.
6500 * @return {@code true} if the character is a space character;
6501 * {@code false} otherwise.
6502 * @see Character#isWhitespace(char)
6503 * @since 1.1
6504 */
6505 public static boolean isSpaceChar(char ch) {
6506 return isSpaceChar((int)ch);
6507 }
6508
6509 /**
6510 * Determines if the specified character (Unicode code point) is a
6511 * Unicode space character. A character is considered to be a
6512 * space character if and only if it is specified to be a space
6513 * character by the Unicode Standard. This method returns true if
6514 * the character's general category type is any of the following:
6515 *
6516 * <ul>
6517 * <li> {@link #SPACE_SEPARATOR}
6518 * <li> {@link #LINE_SEPARATOR}
6519 * <li> {@link #PARAGRAPH_SEPARATOR}
6520 * </ul>
6521 *
6522 * @param codePoint the character (Unicode code point) to be tested.
6523 * @return {@code true} if the character is a space character;
6524 * {@code false} otherwise.
6525 * @see Character#isWhitespace(int)
6526 * @since 1.5
6527 */
6528 public static boolean isSpaceChar(int codePoint) {
6529 return ((((1 << Character.SPACE_SEPARATOR) |
6530 (1 << Character.LINE_SEPARATOR) |
6531 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6532 != 0;
6533 }
6534
6535 /**
6536 * Determines if the specified character is white space according to Java.
6537 * A character is a Java whitespace character if and only if it satisfies
6538 * one of the following criteria:
6539 * <ul>
6540 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6541 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6542 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
6543 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6544 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6545 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6546 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6547 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6548 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6549 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6550 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6551 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6552 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6553 * </ul>
6554 *
6555 * <p><b>Note:</b> This method cannot handle <a
6556 * href="#supplementary"> supplementary characters</a>. To support
6557 * all Unicode characters, including supplementary characters, use
6558 * the {@link #isWhitespace(int)} method.
6559 *
6560 * @param ch the character to be tested.
6561 * @return {@code true} if the character is a Java whitespace
6562 * character; {@code false} otherwise.
6563 * @see Character#isSpaceChar(char)
6564 * @since 1.1
6565 */
6566 public static boolean isWhitespace(char ch) {
6567 return isWhitespace((int)ch);
6568 }
6569
6570 /**
6571 * Determines if the specified character (Unicode code point) is
6572 * white space according to Java. A character is a Java
6573 * whitespace character if and only if it satisfies one of the
6574 * following criteria:
6575 * <ul>
6576 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6577 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6578 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
6579 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6580 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6581 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6582 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6583 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6584 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6585 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6586 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6587 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6588 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6589 * </ul>
6590 * <p>
6591 *
6592 * @param codePoint the character (Unicode code point) to be tested.
6593 * @return {@code true} if the character is a Java whitespace
6594 * character; {@code false} otherwise.
6595 * @see Character#isSpaceChar(int)
6596 * @since 1.5
6597 */
6598 public static boolean isWhitespace(int codePoint) {
6599 return CharacterData.of(codePoint).isWhitespace(codePoint);
6600 }
6601
6602 /**
6603 * Determines if the specified character is an ISO control
6604 * character. A character is considered to be an ISO control
6605 * character if its code is in the range {@code '\u005Cu0000'}
6606 * through {@code '\u005Cu001F'} or in the range
6607 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6608 *
6609 * <p><b>Note:</b> This method cannot handle <a
6610 * href="#supplementary"> supplementary characters</a>. To support
6611 * all Unicode characters, including supplementary characters, use
6612 * the {@link #isISOControl(int)} method.
6613 *
6614 * @param ch the character to be tested.
6615 * @return {@code true} if the character is an ISO control character;
6616 * {@code false} otherwise.
6617 *
6618 * @see Character#isSpaceChar(char)
6619 * @see Character#isWhitespace(char)
6620 * @since 1.1
6621 */
6622 public static boolean isISOControl(char ch) {
6623 return isISOControl((int)ch);
6624 }
6625
6626 /**
6627 * Determines if the referenced character (Unicode code point) is an ISO control
6628 * character. A character is considered to be an ISO control
6629 * character if its code is in the range {@code '\u005Cu0000'}
6630 * through {@code '\u005Cu001F'} or in the range
6631 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6632 *
6633 * @param codePoint the character (Unicode code point) to be tested.
6634 * @return {@code true} if the character is an ISO control character;
6635 * {@code false} otherwise.
6636 * @see Character#isSpaceChar(int)
6637 * @see Character#isWhitespace(int)
6638 * @since 1.5
6639 */
6640 public static boolean isISOControl(int codePoint) {
6641 // Optimized form of:
6642 // (codePoint >= 0x00 && codePoint <= 0x1F) ||
6643 // (codePoint >= 0x7F && codePoint <= 0x9F);
6644 return codePoint <= 0x9F &&
6645 (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6646 }
6647
6648 /**
6649 * Returns a value indicating a character's general category.
6650 *
6651 * <p><b>Note:</b> This method cannot handle <a
6652 * href="#supplementary"> supplementary characters</a>. To support
6653 * all Unicode characters, including supplementary characters, use
6654 * the {@link #getType(int)} method.
6655 *
6656 * @param ch the character to be tested.
6657 * @return a value of type {@code int} representing the
6658 * character's general category.
6659 * @see Character#COMBINING_SPACING_MARK
6660 * @see Character#CONNECTOR_PUNCTUATION
6661 * @see Character#CONTROL
6662 * @see Character#CURRENCY_SYMBOL
6663 * @see Character#DASH_PUNCTUATION
6664 * @see Character#DECIMAL_DIGIT_NUMBER
6665 * @see Character#ENCLOSING_MARK
6666 * @see Character#END_PUNCTUATION
6667 * @see Character#FINAL_QUOTE_PUNCTUATION
6668 * @see Character#FORMAT
6669 * @see Character#INITIAL_QUOTE_PUNCTUATION
6670 * @see Character#LETTER_NUMBER
6671 * @see Character#LINE_SEPARATOR
6672 * @see Character#LOWERCASE_LETTER
6673 * @see Character#MATH_SYMBOL
6674 * @see Character#MODIFIER_LETTER
6675 * @see Character#MODIFIER_SYMBOL
6676 * @see Character#NON_SPACING_MARK
6677 * @see Character#OTHER_LETTER
6678 * @see Character#OTHER_NUMBER
6679 * @see Character#OTHER_PUNCTUATION
6680 * @see Character#OTHER_SYMBOL
6681 * @see Character#PARAGRAPH_SEPARATOR
6682 * @see Character#PRIVATE_USE
6683 * @see Character#SPACE_SEPARATOR
6684 * @see Character#START_PUNCTUATION
6685 * @see Character#SURROGATE
6686 * @see Character#TITLECASE_LETTER
6687 * @see Character#UNASSIGNED
6688 * @see Character#UPPERCASE_LETTER
6689 * @since 1.1
6690 */
6691 public static int getType(char ch) {
6692 return getType((int)ch);
6693 }
6694
6695 /**
6696 * Returns a value indicating a character's general category.
6697 *
6698 * @param codePoint the character (Unicode code point) to be tested.
6699 * @return a value of type {@code int} representing the
6700 * character's general category.
6701 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6702 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6703 * @see Character#CONTROL CONTROL
6704 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6705 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
6706 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6707 * @see Character#ENCLOSING_MARK ENCLOSING_MARK
6708 * @see Character#END_PUNCTUATION END_PUNCTUATION
6709 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6710 * @see Character#FORMAT FORMAT
6711 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6712 * @see Character#LETTER_NUMBER LETTER_NUMBER
6713 * @see Character#LINE_SEPARATOR LINE_SEPARATOR
6714 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
6715 * @see Character#MATH_SYMBOL MATH_SYMBOL
6716 * @see Character#MODIFIER_LETTER MODIFIER_LETTER
6717 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6718 * @see Character#NON_SPACING_MARK NON_SPACING_MARK
6719 * @see Character#OTHER_LETTER OTHER_LETTER
6720 * @see Character#OTHER_NUMBER OTHER_NUMBER
6721 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6722 * @see Character#OTHER_SYMBOL OTHER_SYMBOL
6723 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6724 * @see Character#PRIVATE_USE PRIVATE_USE
6725 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
6726 * @see Character#START_PUNCTUATION START_PUNCTUATION
6727 * @see Character#SURROGATE SURROGATE
6728 * @see Character#TITLECASE_LETTER TITLECASE_LETTER
6729 * @see Character#UNASSIGNED UNASSIGNED
6730 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
6731 * @since 1.5
6732 */
6733 public static int getType(int codePoint) {
6734 return CharacterData.of(codePoint).getType(codePoint);
6735 }
6736
6737 /**
6738 * Determines the character representation for a specific digit in
6739 * the specified radix. If the value of {@code radix} is not a
6740 * valid radix, or the value of {@code digit} is not a valid
6741 * digit in the specified radix, the null character
6742 * ({@code '\u005Cu0000'}) is returned.
6743 * <p>
6744 * The {@code radix} argument is valid if it is greater than or
6745 * equal to {@code MIN_RADIX} and less than or equal to
6746 * {@code MAX_RADIX}. The {@code digit} argument is valid if
6747 * {@code 0 <= digit < radix}.
6748 * <p>
6749 * If the digit is less than 10, then
6750 * {@code '0' + digit} is returned. Otherwise, the value
6751 * {@code 'a' + digit - 10} is returned.
6752 *
6753 * @param digit the number to convert to a character.
6754 * @param radix the radix.
6755 * @return the {@code char} representation of the specified digit
6756 * in the specified radix.
6757 * @see Character#MIN_RADIX
6758 * @see Character#MAX_RADIX
6759 * @see Character#digit(char, int)
6760 */
6761 public static char forDigit(int digit, int radix) {
6762 if ((digit >= radix) || (digit < 0)) {
6763 return '\0';
6764 }
6765 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6766 return '\0';
6767 }
6768 if (digit < 10) {
6769 return (char)('0' + digit);
6770 }
6771 return (char)('a' - 10 + digit);
6772 }
6773
6774 /**
6775 * Returns the Unicode directionality property for the given
6776 * character. Character directionality is used to calculate the
6777 * visual ordering of text. The directionality value of undefined
6778 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6779 *
6780 * <p><b>Note:</b> This method cannot handle <a
6781 * href="#supplementary"> supplementary characters</a>. To support
6782 * all Unicode characters, including supplementary characters, use
6783 * the {@link #getDirectionality(int)} method.
6784 *
6785 * @param ch {@code char} for which the directionality property
6786 * is requested.
6787 * @return the directionality property of the {@code char} value.
6788 *
6789 * @see Character#DIRECTIONALITY_UNDEFINED
6790 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6791 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6792 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6793 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6794 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6795 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6796 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6797 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6798 * @see Character#DIRECTIONALITY_NONSPACING_MARK
6799 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6800 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6801 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6802 * @see Character#DIRECTIONALITY_WHITESPACE
6803 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6804 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6805 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6806 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6807 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6808 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6809 * @since 1.4
6810 */
6811 public static byte getDirectionality(char ch) {
6812 return getDirectionality((int)ch);
6813 }
6814
6815 /**
6816 * Returns the Unicode directionality property for the given
6817 * character (Unicode code point). Character directionality is
6818 * used to calculate the visual ordering of text. The
6819 * directionality value of undefined character is {@link
6820 * #DIRECTIONALITY_UNDEFINED}.
6821 *
6822 * @param codePoint the character (Unicode code point) for which
6823 * the directionality property is requested.
6824 * @return the directionality property of the character.
6825 *
6826 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
6827 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
6828 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
6829 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6830 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
6831 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6832 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6833 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
6834 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6835 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
6836 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
6837 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
6838 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
6839 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
6840 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
6841 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6842 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6843 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6844 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6845 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6846 * @since 1.5
6847 */
6848 public static byte getDirectionality(int codePoint) {
6849 return CharacterData.of(codePoint).getDirectionality(codePoint);
6850 }
6851
6852 /**
6853 * Determines whether the character is mirrored according to the
6854 * Unicode specification. Mirrored characters should have their
6855 * glyphs horizontally mirrored when displayed in text that is
6856 * right-to-left. For example, {@code '\u005Cu0028'} LEFT
6857 * PARENTHESIS is semantically defined to be an <i>opening
6858 * parenthesis</i>. This will appear as a "(" in text that is
6859 * left-to-right but as a ")" in text that is right-to-left.
6860 *
6861 * <p><b>Note:</b> This method cannot handle <a
6862 * href="#supplementary"> supplementary characters</a>. To support
6863 * all Unicode characters, including supplementary characters, use
6864 * the {@link #isMirrored(int)} method.
6865 *
6866 * @param ch {@code char} for which the mirrored property is requested
6867 * @return {@code true} if the char is mirrored, {@code false}
6868 * if the {@code char} is not mirrored or is not defined.
6869 * @since 1.4
6870 */
6871 public static boolean isMirrored(char ch) {
6872 return isMirrored((int)ch);
6873 }
6874
6875 /**
6876 * Determines whether the specified character (Unicode code point)
6877 * is mirrored according to the Unicode specification. Mirrored
6878 * characters should have their glyphs horizontally mirrored when
6879 * displayed in text that is right-to-left. For example,
6880 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
6881 * defined to be an <i>opening parenthesis</i>. This will appear
6882 * as a "(" in text that is left-to-right but as a ")" in text
6883 * that is right-to-left.
6884 *
6885 * @param codePoint the character (Unicode code point) to be tested.
6886 * @return {@code true} if the character is mirrored, {@code false}
6887 * if the character is not mirrored or is not defined.
6888 * @since 1.5
6889 */
6890 public static boolean isMirrored(int codePoint) {
6891 return CharacterData.of(codePoint).isMirrored(codePoint);
6892 }
6893
6894 /**
6895 * Compares two {@code Character} objects numerically.
6896 *
6897 * @param anotherCharacter the {@code Character} to be compared.
6898
6899 * @return the value {@code 0} if the argument {@code Character}
6900 * is equal to this {@code Character}; a value less than
6901 * {@code 0} if this {@code Character} is numerically less
6902 * than the {@code Character} argument; and a value greater than
6903 * {@code 0} if this {@code Character} is numerically greater
6904 * than the {@code Character} argument (unsigned comparison).
6905 * Note that this is strictly a numerical comparison; it is not
6906 * locale-dependent.
6907 * @since 1.2
6908 */
6909 public int compareTo(Character anotherCharacter) {
6910 return compare(this.value, anotherCharacter.value);
6911 }
6912
6913 /**
6914 * Compares two {@code char} values numerically.
6915 * The value returned is identical to what would be returned by:
6916 * <pre>
6917 * Character.valueOf(x).compareTo(Character.valueOf(y))
6918 * </pre>
6919 *
6920 * @param x the first {@code char} to compare
6921 * @param y the second {@code char} to compare
6922 * @return the value {@code 0} if {@code x == y};
6923 * a value less than {@code 0} if {@code x < y}; and
6924 * a value greater than {@code 0} if {@code x > y}
6925 * @since 1.7
6926 */
6927 public static int compare(char x, char y) {
6928 return x - y;
6929 }
6930
6931 /**
6932 * Converts the character (Unicode code point) argument to uppercase using
6933 * information from the UnicodeData file.
6934 * <p>
6935 *
6936 * @param codePoint the character (Unicode code point) to be converted.
6937 * @return either the uppercase equivalent of the character, if
6938 * any, or an error flag ({@code Character.ERROR})
6939 * that indicates that a 1:M {@code char} mapping exists.
6940 * @see Character#isLowerCase(char)
6941 * @see Character#isUpperCase(char)
6942 * @see Character#toLowerCase(char)
6943 * @see Character#toTitleCase(char)
6944 * @since 1.4
6945 */
6946 static int toUpperCaseEx(int codePoint) {
6947 assert isValidCodePoint(codePoint);
6948 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
6949 }
6950
6951 /**
6952 * Converts the character (Unicode code point) argument to uppercase using case
6953 * mapping information from the SpecialCasing file in the Unicode
6954 * specification. If a character has no explicit uppercase
6955 * mapping, then the {@code char} itself is returned in the
6956 * {@code char[]}.
6957 *
6958 * @param codePoint the character (Unicode code point) to be converted.
6959 * @return a {@code char[]} with the uppercased character.
6960 * @since 1.4
6961 */
6962 static char[] toUpperCaseCharArray(int codePoint) {
6963 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
6964 assert isBmpCodePoint(codePoint);
6965 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
6966 }
6967
6968 /**
6969 * The number of bits used to represent a <tt>char</tt> value in unsigned
6970 * binary form, constant {@code 16}.
6971 *
6972 * @since 1.5
6973 */
6974 public static final int SIZE = 16;
6975
6976 /**
6977 * Returns the value obtained by reversing the order of the bytes in the
6978 * specified <tt>char</tt> value.
6979 *
6980 * @return the value obtained by reversing (or, equivalently, swapping)
6981 * the bytes in the specified <tt>char</tt> value.
6982 * @since 1.5
6983 */
6984 public static char reverseBytes(char ch) {
6985 return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
6986 }
6987
6988 /**
6989 * Returns the Unicode name of the specified character
6990 * {@code codePoint}, or null if the code point is
6991 * {@link #UNASSIGNED unassigned}.
6992 * <p>
6993 * Note: if the specified character is not assigned a name by
6994 * the <i>UnicodeData</i> file (part of the Unicode Character
6995 * Database maintained by the Unicode Consortium), the returned
6996 * name is the same as the result of expression.
6997 *
6998 * <blockquote>{@code
6999 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7000 * + " "
7001 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7002 *
7003 * }</blockquote>
7004 *
7005 * @param codePoint the character (Unicode code point)
7006 *
7007 * @return the Unicode name of the specified character, or null if
7008 * the code point is unassigned.
7009 *
7010 * @exception IllegalArgumentException if the specified
7011 * {@code codePoint} is not a valid Unicode
7012 * code point.
7013 *
7014 * @since 1.7
7015 */
7016 public static String getName(int codePoint) {
7017 if (!isValidCodePoint(codePoint)) {
7018 throw new IllegalArgumentException();
7019 }
7020 String name = CharacterName.get(codePoint);
7021 if (name != null)
7022 return name;
7023 if (getType(codePoint) == UNASSIGNED)
7024 return null;
7025 UnicodeBlock block = UnicodeBlock.of(codePoint);
7026 if (block != null)
7027 return block.toString().replace('_', ' ') + " "
7028 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7029 // should never come here
7030 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7031 }
7032 }