1 /*
2 * Copyright 2002-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26 package java.lang;
27 import java.util.Map;
28 import java.util.HashMap;
29 import java.util.Locale;
30
31 /**
32 * The <code>Character</code> class wraps a value of the primitive
33 * type <code>char</code> in an object. An object of type
34 * <code>Character</code> contains a single field whose type is
35 * <code>char</code>.
36 * <p>
37 * In addition, this class provides several methods for determining
38 * a character's category (lowercase letter, digit, etc.) and for converting
39 * characters from uppercase to lowercase and vice versa.
40 * <p>
41 * Character information is based on the Unicode Standard, version 4.0.
42 * <p>
43 * The methods and data of class <code>Character</code> are defined by
44 * the information in the <i>UnicodeData</i> file that is part of the
45 * Unicode Character Database maintained by the Unicode
46 * Consortium. This file specifies various properties including name
47 * and general category for every defined Unicode code point or
48 * character range.
49 * <p>
50 * The file and its description are available from the Unicode Consortium at:
51 * <ul>
52 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
53 * </ul>
54 *
55 * <h4><a name="unicode">Unicode Character Representations</a></h4>
56 *
57 * <p>The <code>char</code> data type (and therefore the value that a
58 * <code>Character</code> object encapsulates) are based on the
59 * original Unicode specification, which defined characters as
60 * fixed-width 16-bit entities. The Unicode standard has since been
61 * changed to allow for characters whose representation requires more
62 * than 16 bits. The range of legal <em>code point</em>s is now
63 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
64 * (Refer to the <a
65 * href="http://www.unicode.org/reports/tr27/#notation"><i>
66 * definition</i></a> of the U+<i>n</i> notation in the Unicode
67 * standard.)
68 *
69 * <p>The set of characters from U+0000 to U+FFFF is sometimes
70 * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
71 * name="supplementary">Characters</a> whose code points are greater
72 * than U+FFFF are called <em>supplementary character</em>s. The Java
73 * 2 platform uses the UTF-16 representation in <code>char</code>
74 * arrays and in the <code>String</code> and <code>StringBuffer</code>
75 * classes. In this representation, supplementary characters are
76 * represented as a pair of <code>char</code> values, the first from
77 * the <em>high-surrogates</em> range, (\uD800-\uDBFF), the
78 * second from the <em>low-surrogates</em> range
79 * (\uDC00-\uDFFF).
80 *
81 * <p>A <code>char</code> value, therefore, represents Basic
82 * Multilingual Plane (BMP) code points, including the surrogate
83 * code points, or code units of the UTF-16 encoding. An
84 * <code>int</code> value represents all Unicode code points,
85 * including supplementary code points. The lower (least significant)
86 * 21 bits of <code>int</code> are used to represent Unicode code
87 * points and the upper (most significant) 11 bits must be zero.
88 * Unless otherwise specified, the behavior with respect to
89 * supplementary characters and surrogate <code>char</code> values is
90 * as follows:
91 *
92 * <ul>
93 * <li>The methods that only accept a <code>char</code> value cannot support
94 * supplementary characters. They treat <code>char</code> values from the
95 * surrogate ranges as undefined characters. For example,
96 * <code>Character.isLetter('\uD840')</code> returns <code>false</code>, even though
97 * this specific value if followed by any low-surrogate value in a string
98 * would represent a letter.
99 *
100 * <li>The methods that accept an <code>int</code> value support all
101 * Unicode characters, including supplementary characters. For
102 * example, <code>Character.isLetter(0x2F81A)</code> returns
103 * <code>true</code> because the code point value represents a letter
104 * (a CJK ideograph).
105 * </ul>
106 *
107 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
108 * used for character values in the range between U+0000 and U+10FFFF,
109 * and <em>Unicode code unit</em> is used for 16-bit
110 * <code>char</code> values that are code units of the <em>UTF-16</em>
111 * encoding. For more information on Unicode terminology, refer to the
112 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
113 *
114 * @author Lee Boynton
115 * @author Guy Steele
116 * @author Akira Tanaka
117 * @since 1.0
118 */
119 public final
120 class Character extends Object implements java.io.Serializable, Comparable<Character> {
121 /**
122 * The minimum radix available for conversion to and from strings.
123 * The constant value of this field is the smallest value permitted
124 * for the radix argument in radix-conversion methods such as the
125 * <code>digit</code> method, the <code>forDigit</code>
126 * method, and the <code>toString</code> method of class
127 * <code>Integer</code>.
128 *
129 * @see java.lang.Character#digit(char, int)
130 * @see java.lang.Character#forDigit(int, int)
131 * @see java.lang.Integer#toString(int, int)
132 * @see java.lang.Integer#valueOf(java.lang.String)
133 */
134 public static final int MIN_RADIX = 2;
135
136 /**
137 * The maximum radix available for conversion to and from strings.
138 * The constant value of this field is the largest value permitted
139 * for the radix argument in radix-conversion methods such as the
140 * <code>digit</code> method, the <code>forDigit</code>
141 * method, and the <code>toString</code> method of class
142 * <code>Integer</code>.
143 *
144 * @see java.lang.Character#digit(char, int)
145 * @see java.lang.Character#forDigit(int, int)
146 * @see java.lang.Integer#toString(int, int)
147 * @see java.lang.Integer#valueOf(java.lang.String)
148 */
149 public static final int MAX_RADIX = 36;
150
151 /**
152 * The constant value of this field is the smallest value of type
153 * <code>char</code>, <code>'\u0000'</code>.
154 *
155 * @since 1.0.2
156 */
157 public static final char MIN_VALUE = '\u0000';
158
159 /**
160 * The constant value of this field is the largest value of type
161 * <code>char</code>, <code>'\uFFFF'</code>.
162 *
163 * @since 1.0.2
164 */
165 public static final char MAX_VALUE = '\uffff';
166
167 /**
168 * The <code>Class</code> instance representing the primitive type
169 * <code>char</code>.
170 *
171 * @since 1.1
172 */
173 public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
174
175 /*
176 * Normative general types
177 */
178
179 /*
180 * General character types
181 */
182
183 /**
184 * General category "Cn" in the Unicode specification.
185 * @since 1.1
186 */
187 public static final byte
188 UNASSIGNED = 0;
189
190 /**
191 * General category "Lu" in the Unicode specification.
192 * @since 1.1
193 */
194 public static final byte
195 UPPERCASE_LETTER = 1;
196
197 /**
198 * General category "Ll" in the Unicode specification.
199 * @since 1.1
200 */
201 public static final byte
202 LOWERCASE_LETTER = 2;
203
204 /**
205 * General category "Lt" in the Unicode specification.
206 * @since 1.1
207 */
208 public static final byte
209 TITLECASE_LETTER = 3;
210
211 /**
212 * General category "Lm" in the Unicode specification.
213 * @since 1.1
214 */
215 public static final byte
216 MODIFIER_LETTER = 4;
217
218 /**
219 * General category "Lo" in the Unicode specification.
220 * @since 1.1
221 */
222 public static final byte
223 OTHER_LETTER = 5;
224
225 /**
226 * General category "Mn" in the Unicode specification.
227 * @since 1.1
228 */
229 public static final byte
230 NON_SPACING_MARK = 6;
231
232 /**
233 * General category "Me" in the Unicode specification.
234 * @since 1.1
235 */
236 public static final byte
237 ENCLOSING_MARK = 7;
238
239 /**
240 * General category "Mc" in the Unicode specification.
241 * @since 1.1
242 */
243 public static final byte
244 COMBINING_SPACING_MARK = 8;
245
246 /**
247 * General category "Nd" in the Unicode specification.
248 * @since 1.1
249 */
250 public static final byte
251 DECIMAL_DIGIT_NUMBER = 9;
252
253 /**
254 * General category "Nl" in the Unicode specification.
255 * @since 1.1
256 */
257 public static final byte
258 LETTER_NUMBER = 10;
259
260 /**
261 * General category "No" in the Unicode specification.
262 * @since 1.1
263 */
264 public static final byte
265 OTHER_NUMBER = 11;
266
267 /**
268 * General category "Zs" in the Unicode specification.
269 * @since 1.1
270 */
271 public static final byte
272 SPACE_SEPARATOR = 12;
273
274 /**
275 * General category "Zl" in the Unicode specification.
276 * @since 1.1
277 */
278 public static final byte
279 LINE_SEPARATOR = 13;
280
281 /**
282 * General category "Zp" in the Unicode specification.
283 * @since 1.1
284 */
285 public static final byte
286 PARAGRAPH_SEPARATOR = 14;
287
288 /**
289 * General category "Cc" in the Unicode specification.
290 * @since 1.1
291 */
292 public static final byte
293 CONTROL = 15;
294
295 /**
296 * General category "Cf" in the Unicode specification.
297 * @since 1.1
298 */
299 public static final byte
300 FORMAT = 16;
301
302 /**
303 * General category "Co" in the Unicode specification.
304 * @since 1.1
305 */
306 public static final byte
307 PRIVATE_USE = 18;
308
309 /**
310 * General category "Cs" in the Unicode specification.
311 * @since 1.1
312 */
313 public static final byte
314 SURROGATE = 19;
315
316 /**
317 * General category "Pd" in the Unicode specification.
318 * @since 1.1
319 */
320 public static final byte
321 DASH_PUNCTUATION = 20;
322
323 /**
324 * General category "Ps" in the Unicode specification.
325 * @since 1.1
326 */
327 public static final byte
328 START_PUNCTUATION = 21;
329
330 /**
331 * General category "Pe" in the Unicode specification.
332 * @since 1.1
333 */
334 public static final byte
335 END_PUNCTUATION = 22;
336
337 /**
338 * General category "Pc" in the Unicode specification.
339 * @since 1.1
340 */
341 public static final byte
342 CONNECTOR_PUNCTUATION = 23;
343
344 /**
345 * General category "Po" in the Unicode specification.
346 * @since 1.1
347 */
348 public static final byte
349 OTHER_PUNCTUATION = 24;
350
351 /**
352 * General category "Sm" in the Unicode specification.
353 * @since 1.1
354 */
355 public static final byte
356 MATH_SYMBOL = 25;
357
358 /**
359 * General category "Sc" in the Unicode specification.
360 * @since 1.1
361 */
362 public static final byte
363 CURRENCY_SYMBOL = 26;
364
365 /**
366 * General category "Sk" in the Unicode specification.
367 * @since 1.1
368 */
369 public static final byte
370 MODIFIER_SYMBOL = 27;
371
372 /**
373 * General category "So" in the Unicode specification.
374 * @since 1.1
375 */
376 public static final byte
377 OTHER_SYMBOL = 28;
378
379 /**
380 * General category "Pi" in the Unicode specification.
381 * @since 1.4
382 */
383 public static final byte
384 INITIAL_QUOTE_PUNCTUATION = 29;
385
386 /**
387 * General category "Pf" in the Unicode specification.
388 * @since 1.4
389 */
390 public static final byte
391 FINAL_QUOTE_PUNCTUATION = 30;
392
393 /**
394 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
395 */
396 static final int ERROR = 0xFFFFFFFF;
397
398
399 /**
400 * Undefined bidirectional character type. Undefined <code>char</code>
401 * values have undefined directionality in the Unicode specification.
402 * @since 1.4
403 */
404 public static final byte DIRECTIONALITY_UNDEFINED = -1;
405
406 /**
407 * Strong bidirectional character type "L" in the Unicode specification.
408 * @since 1.4
409 */
410 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
411
412 /**
413 * Strong bidirectional character type "R" in the Unicode specification.
414 * @since 1.4
415 */
416 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
417
418 /**
419 * Strong bidirectional character type "AL" in the Unicode specification.
420 * @since 1.4
421 */
422 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
423
424 /**
425 * Weak bidirectional character type "EN" in the Unicode specification.
426 * @since 1.4
427 */
428 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
429
430 /**
431 * Weak bidirectional character type "ES" in the Unicode specification.
432 * @since 1.4
433 */
434 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
435
436 /**
437 * Weak bidirectional character type "ET" in the Unicode specification.
438 * @since 1.4
439 */
440 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
441
442 /**
443 * Weak bidirectional character type "AN" in the Unicode specification.
444 * @since 1.4
445 */
446 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
447
448 /**
449 * Weak bidirectional character type "CS" in the Unicode specification.
450 * @since 1.4
451 */
452 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
453
454 /**
455 * Weak bidirectional character type "NSM" in the Unicode specification.
456 * @since 1.4
457 */
458 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
459
460 /**
461 * Weak bidirectional character type "BN" in the Unicode specification.
462 * @since 1.4
463 */
464 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
465
466 /**
467 * Neutral bidirectional character type "B" in the Unicode specification.
468 * @since 1.4
469 */
470 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
471
472 /**
473 * Neutral bidirectional character type "S" in the Unicode specification.
474 * @since 1.4
475 */
476 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
477
478 /**
479 * Neutral bidirectional character type "WS" in the Unicode specification.
480 * @since 1.4
481 */
482 public static final byte DIRECTIONALITY_WHITESPACE = 12;
483
484 /**
485 * Neutral bidirectional character type "ON" in the Unicode specification.
486 * @since 1.4
487 */
488 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
489
490 /**
491 * Strong bidirectional character type "LRE" in the Unicode specification.
492 * @since 1.4
493 */
494 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
495
496 /**
497 * Strong bidirectional character type "LRO" in the Unicode specification.
498 * @since 1.4
499 */
500 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
501
502 /**
503 * Strong bidirectional character type "RLE" in the Unicode specification.
504 * @since 1.4
505 */
506 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
507
508 /**
509 * Strong bidirectional character type "RLO" in the Unicode specification.
510 * @since 1.4
511 */
512 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
513
514 /**
515 * Weak bidirectional character type "PDF" in the Unicode specification.
516 * @since 1.4
517 */
518 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
519
520 /**
521 * The minimum value of a Unicode high-surrogate code unit in the
522 * UTF-16 encoding. A high-surrogate is also known as a
523 * <i>leading-surrogate</i>.
524 *
525 * @since 1.5
526 */
527 public static final char MIN_HIGH_SURROGATE = '\uD800';
528
529 /**
530 * The maximum value of a Unicode high-surrogate code unit in the
531 * UTF-16 encoding. A high-surrogate is also known as a
532 * <i>leading-surrogate</i>.
533 *
534 * @since 1.5
535 */
536 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
537
538 /**
539 * The minimum value of a Unicode low-surrogate code unit in the
540 * UTF-16 encoding. A low-surrogate is also known as a
541 * <i>trailing-surrogate</i>.
542 *
543 * @since 1.5
544 */
545 public static final char MIN_LOW_SURROGATE = '\uDC00';
546
547 /**
548 * The maximum value of a Unicode low-surrogate code unit in the
549 * UTF-16 encoding. A low-surrogate is also known as a
550 * <i>trailing-surrogate</i>.
551 *
552 * @since 1.5
553 */
554 public static final char MAX_LOW_SURROGATE = '\uDFFF';
555
556 /**
557 * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
558 *
559 * @since 1.5
560 */
561 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
562
563 /**
564 * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
565 *
566 * @since 1.5
567 */
568 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
569
570 /**
571 * The minimum value of a supplementary code point.
572 *
573 * @since 1.5
574 */
575 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
576
577 /**
578 * The minimum value of a Unicode code point.
579 *
580 * @since 1.5
581 */
582 public static final int MIN_CODE_POINT = 0x000000;
583
584 /**
585 * The maximum value of a Unicode code point.
586 *
587 * @since 1.5
588 */
589 public static final int MAX_CODE_POINT = 0x10ffff;
590
591
592 /**
593 * Instances of this class represent particular subsets of the Unicode
594 * character set. The only family of subsets defined in the
595 * <code>Character</code> class is <code>{@link Character.UnicodeBlock
596 * UnicodeBlock}</code>. Other portions of the Java API may define other
597 * subsets for their own purposes.
598 *
599 * @since 1.2
600 */
601 public static class Subset {
602
603 private String name;
604
605 /**
606 * Constructs a new <code>Subset</code> instance.
607 *
608 * @exception NullPointerException if name is <code>null</code>
609 * @param name The name of this subset
610 */
611 protected Subset(String name) {
612 if (name == null) {
613 throw new NullPointerException("name");
614 }
615 this.name = name;
616 }
617
618 /**
619 * Compares two <code>Subset</code> objects for equality.
620 * This method returns <code>true</code> if and only if
621 * <code>this</code> and the argument refer to the same
622 * object; since this method is <code>final</code>, this
623 * guarantee holds for all subclasses.
624 */
625 public final boolean equals(Object obj) {
626 return (this == obj);
627 }
628
629 /**
630 * Returns the standard hash code as defined by the
631 * <code>{@link Object#hashCode}</code> method. This method
632 * is <code>final</code> in order to ensure that the
633 * <code>equals</code> and <code>hashCode</code> methods will
634 * be consistent in all subclasses.
635 */
636 public final int hashCode() {
637 return super.hashCode();
638 }
639
640 /**
641 * Returns the name of this subset.
642 */
643 public final String toString() {
644 return name;
645 }
646 }
647
648 /**
649 * A family of character subsets representing the character blocks in the
650 * Unicode specification. Character blocks generally define characters
651 * used for a specific script or purpose. A character is contained by
652 * at most one Unicode block.
653 *
654 * @since 1.2
655 */
656 public static final class UnicodeBlock extends Subset {
657
658 private static Map map = new HashMap();
659
660 /**
661 * Create a UnicodeBlock with the given identifier name.
662 * This name must be the same as the block identifier.
663 */
664 private UnicodeBlock(String idName) {
665 super(idName);
666 map.put(idName.toUpperCase(Locale.US), this);
667 }
668
669 /**
670 * Create a UnicodeBlock with the given identifier name and
671 * alias name.
672 */
673 private UnicodeBlock(String idName, String alias) {
674 this(idName);
675 map.put(alias.toUpperCase(Locale.US), this);
676 }
677
678 /**
679 * Create a UnicodeBlock with the given identifier name and
680 * alias names.
681 */
682 private UnicodeBlock(String idName, String[] aliasName) {
683 this(idName);
684 if (aliasName != null) {
685 for(int x=0; x<aliasName.length; ++x) {
686 map.put(aliasName[x].toUpperCase(Locale.US), this);
687 }
688 }
689 }
690
691 /**
692 * Constant for the "Basic Latin" Unicode character block.
693 * @since 1.2
694 */
695 public static final UnicodeBlock BASIC_LATIN =
696 new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" });
697
698 /**
699 * Constant for the "Latin-1 Supplement" Unicode character block.
700 * @since 1.2
701 */
702 public static final UnicodeBlock LATIN_1_SUPPLEMENT =
703 new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"});
704
705 /**
706 * Constant for the "Latin Extended-A" Unicode character block.
707 * @since 1.2
708 */
709 public static final UnicodeBlock LATIN_EXTENDED_A =
710 new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"});
711
712 /**
713 * Constant for the "Latin Extended-B" Unicode character block.
714 * @since 1.2
715 */
716 public static final UnicodeBlock LATIN_EXTENDED_B =
717 new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"});
718
719 /**
720 * Constant for the "IPA Extensions" Unicode character block.
721 * @since 1.2
722 */
723 public static final UnicodeBlock IPA_EXTENSIONS =
724 new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"});
725
726 /**
727 * Constant for the "Spacing Modifier Letters" Unicode character block.
728 * @since 1.2
729 */
730 public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
731 new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters",
732 "SpacingModifierLetters"});
733
734 /**
735 * Constant for the "Combining Diacritical Marks" Unicode character block.
736 * @since 1.2
737 */
738 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
739 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks",
740 "CombiningDiacriticalMarks" });
741
742 /**
743 * Constant for the "Greek and Coptic" Unicode character block.
744 * <p>
745 * This block was previously known as the "Greek" block.
746 *
747 * @since 1.2
748 */
749 public static final UnicodeBlock GREEK
750 = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"});
751
752 /**
753 * Constant for the "Cyrillic" Unicode character block.
754 * @since 1.2
755 */
756 public static final UnicodeBlock CYRILLIC =
757 new UnicodeBlock("CYRILLIC");
758
759 /**
760 * Constant for the "Armenian" Unicode character block.
761 * @since 1.2
762 */
763 public static final UnicodeBlock ARMENIAN =
764 new UnicodeBlock("ARMENIAN");
765
766 /**
767 * Constant for the "Hebrew" Unicode character block.
768 * @since 1.2
769 */
770 public static final UnicodeBlock HEBREW =
771 new UnicodeBlock("HEBREW");
772
773 /**
774 * Constant for the "Arabic" Unicode character block.
775 * @since 1.2
776 */
777 public static final UnicodeBlock ARABIC =
778 new UnicodeBlock("ARABIC");
779
780 /**
781 * Constant for the "Devanagari" Unicode character block.
782 * @since 1.2
783 */
784 public static final UnicodeBlock DEVANAGARI =
785 new UnicodeBlock("DEVANAGARI");
786
787 /**
788 * Constant for the "Bengali" Unicode character block.
789 * @since 1.2
790 */
791 public static final UnicodeBlock BENGALI =
792 new UnicodeBlock("BENGALI");
793
794 /**
795 * Constant for the "Gurmukhi" Unicode character block.
796 * @since 1.2
797 */
798 public static final UnicodeBlock GURMUKHI =
799 new UnicodeBlock("GURMUKHI");
800
801 /**
802 * Constant for the "Gujarati" Unicode character block.
803 * @since 1.2
804 */
805 public static final UnicodeBlock GUJARATI =
806 new UnicodeBlock("GUJARATI");
807
808 /**
809 * Constant for the "Oriya" Unicode character block.
810 * @since 1.2
811 */
812 public static final UnicodeBlock ORIYA =
813 new UnicodeBlock("ORIYA");
814
815 /**
816 * Constant for the "Tamil" Unicode character block.
817 * @since 1.2
818 */
819 public static final UnicodeBlock TAMIL =
820 new UnicodeBlock("TAMIL");
821
822 /**
823 * Constant for the "Telugu" Unicode character block.
824 * @since 1.2
825 */
826 public static final UnicodeBlock TELUGU =
827 new UnicodeBlock("TELUGU");
828
829 /**
830 * Constant for the "Kannada" Unicode character block.
831 * @since 1.2
832 */
833 public static final UnicodeBlock KANNADA =
834 new UnicodeBlock("KANNADA");
835
836 /**
837 * Constant for the "Malayalam" Unicode character block.
838 * @since 1.2
839 */
840 public static final UnicodeBlock MALAYALAM =
841 new UnicodeBlock("MALAYALAM");
842
843 /**
844 * Constant for the "Thai" Unicode character block.
845 * @since 1.2
846 */
847 public static final UnicodeBlock THAI =
848 new UnicodeBlock("THAI");
849
850 /**
851 * Constant for the "Lao" Unicode character block.
852 * @since 1.2
853 */
854 public static final UnicodeBlock LAO =
855 new UnicodeBlock("LAO");
856
857 /**
858 * Constant for the "Tibetan" Unicode character block.
859 * @since 1.2
860 */
861 public static final UnicodeBlock TIBETAN =
862 new UnicodeBlock("TIBETAN");
863
864 /**
865 * Constant for the "Georgian" Unicode character block.
866 * @since 1.2
867 */
868 public static final UnicodeBlock GEORGIAN =
869 new UnicodeBlock("GEORGIAN");
870
871 /**
872 * Constant for the "Hangul Jamo" Unicode character block.
873 * @since 1.2
874 */
875 public static final UnicodeBlock HANGUL_JAMO =
876 new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"});
877
878 /**
879 * Constant for the "Latin Extended Additional" Unicode character block.
880 * @since 1.2
881 */
882 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
883 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional",
884 "LatinExtendedAdditional"});
885
886 /**
887 * Constant for the "Greek Extended" Unicode character block.
888 * @since 1.2
889 */
890 public static final UnicodeBlock GREEK_EXTENDED =
891 new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"});
892
893 /**
894 * Constant for the "General Punctuation" Unicode character block.
895 * @since 1.2
896 */
897 public static final UnicodeBlock GENERAL_PUNCTUATION =
898 new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"});
899
900 /**
901 * Constant for the "Superscripts and Subscripts" Unicode character block.
902 * @since 1.2
903 */
904 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
905 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts",
906 "SuperscriptsandSubscripts" });
907
908 /**
909 * Constant for the "Currency Symbols" Unicode character block.
910 * @since 1.2
911 */
912 public static final UnicodeBlock CURRENCY_SYMBOLS =
913 new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"});
914
915 /**
916 * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
917 * <p>
918 * This block was previously known as "Combining Marks for Symbols".
919 * @since 1.2
920 */
921 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
922 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols",
923 "CombiningDiacriticalMarksforSymbols",
924 "Combining Marks for Symbols",
925 "CombiningMarksforSymbols" });
926
927 /**
928 * Constant for the "Letterlike Symbols" Unicode character block.
929 * @since 1.2
930 */
931 public static final UnicodeBlock LETTERLIKE_SYMBOLS =
932 new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"});
933
934 /**
935 * Constant for the "Number Forms" Unicode character block.
936 * @since 1.2
937 */
938 public static final UnicodeBlock NUMBER_FORMS =
939 new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"});
940
941 /**
942 * Constant for the "Arrows" Unicode character block.
943 * @since 1.2
944 */
945 public static final UnicodeBlock ARROWS =
946 new UnicodeBlock("ARROWS");
947
948 /**
949 * Constant for the "Mathematical Operators" Unicode character block.
950 * @since 1.2
951 */
952 public static final UnicodeBlock MATHEMATICAL_OPERATORS =
953 new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators",
954 "MathematicalOperators"});
955
956 /**
957 * Constant for the "Miscellaneous Technical" Unicode character block.
958 * @since 1.2
959 */
960 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
961 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical",
962 "MiscellaneousTechnical"});
963
964 /**
965 * Constant for the "Control Pictures" Unicode character block.
966 * @since 1.2
967 */
968 public static final UnicodeBlock CONTROL_PICTURES =
969 new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"});
970
971 /**
972 * Constant for the "Optical Character Recognition" Unicode character block.
973 * @since 1.2
974 */
975 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
976 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition",
977 "OpticalCharacterRecognition"});
978
979 /**
980 * Constant for the "Enclosed Alphanumerics" Unicode character block.
981 * @since 1.2
982 */
983 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
984 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics",
985 "EnclosedAlphanumerics"});
986
987 /**
988 * Constant for the "Box Drawing" Unicode character block.
989 * @since 1.2
990 */
991 public static final UnicodeBlock BOX_DRAWING =
992 new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"});
993
994 /**
995 * Constant for the "Block Elements" Unicode character block.
996 * @since 1.2
997 */
998 public static final UnicodeBlock BLOCK_ELEMENTS =
999 new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"});
1000
1001 /**
1002 * Constant for the "Geometric Shapes" Unicode character block.
1003 * @since 1.2
1004 */
1005 public static final UnicodeBlock GEOMETRIC_SHAPES =
1006 new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"});
1007
1008 /**
1009 * Constant for the "Miscellaneous Symbols" Unicode character block.
1010 * @since 1.2
1011 */
1012 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1013 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols",
1014 "MiscellaneousSymbols"});
1015
1016 /**
1017 * Constant for the "Dingbats" Unicode character block.
1018 * @since 1.2
1019 */
1020 public static final UnicodeBlock DINGBATS =
1021 new UnicodeBlock("DINGBATS");
1022
1023 /**
1024 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1025 * @since 1.2
1026 */
1027 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1028 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation",
1029 "CJKSymbolsandPunctuation"});
1030
1031 /**
1032 * Constant for the "Hiragana" Unicode character block.
1033 * @since 1.2
1034 */
1035 public static final UnicodeBlock HIRAGANA =
1036 new UnicodeBlock("HIRAGANA");
1037
1038 /**
1039 * Constant for the "Katakana" Unicode character block.
1040 * @since 1.2
1041 */
1042 public static final UnicodeBlock KATAKANA =
1043 new UnicodeBlock("KATAKANA");
1044
1045 /**
1046 * Constant for the "Bopomofo" Unicode character block.
1047 * @since 1.2
1048 */
1049 public static final UnicodeBlock BOPOMOFO =
1050 new UnicodeBlock("BOPOMOFO");
1051
1052 /**
1053 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1054 * @since 1.2
1055 */
1056 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1057 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo",
1058 "HangulCompatibilityJamo"});
1059
1060 /**
1061 * Constant for the "Kanbun" Unicode character block.
1062 * @since 1.2
1063 */
1064 public static final UnicodeBlock KANBUN =
1065 new UnicodeBlock("KANBUN");
1066
1067 /**
1068 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1069 * @since 1.2
1070 */
1071 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1072 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months",
1073 "EnclosedCJKLettersandMonths"});
1074
1075 /**
1076 * Constant for the "CJK Compatibility" Unicode character block.
1077 * @since 1.2
1078 */
1079 public static final UnicodeBlock CJK_COMPATIBILITY =
1080 new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"});
1081
1082 /**
1083 * Constant for the "CJK Unified Ideographs" Unicode character block.
1084 * @since 1.2
1085 */
1086 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1087 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs",
1088 "CJKUnifiedIdeographs"});
1089
1090 /**
1091 * Constant for the "Hangul Syllables" Unicode character block.
1092 * @since 1.2
1093 */
1094 public static final UnicodeBlock HANGUL_SYLLABLES =
1095 new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"});
1096
1097 /**
1098 * Constant for the "Private Use Area" Unicode character block.
1099 * @since 1.2
1100 */
1101 public static final UnicodeBlock PRIVATE_USE_AREA =
1102 new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"});
1103
1104 /**
1105 * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1106 * @since 1.2
1107 */
1108 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1109 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1110 new String[] {"CJK Compatibility Ideographs",
1111 "CJKCompatibilityIdeographs"});
1112
1113 /**
1114 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1115 * @since 1.2
1116 */
1117 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1118 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms",
1119 "AlphabeticPresentationForms"});
1120
1121 /**
1122 * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1123 * @since 1.2
1124 */
1125 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1126 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A",
1127 "ArabicPresentationForms-A"});
1128
1129 /**
1130 * Constant for the "Combining Half Marks" Unicode character block.
1131 * @since 1.2
1132 */
1133 public static final UnicodeBlock COMBINING_HALF_MARKS =
1134 new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks",
1135 "CombiningHalfMarks"});
1136
1137 /**
1138 * Constant for the "CJK Compatibility Forms" Unicode character block.
1139 * @since 1.2
1140 */
1141 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1142 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms",
1143 "CJKCompatibilityForms"});
1144
1145 /**
1146 * Constant for the "Small Form Variants" Unicode character block.
1147 * @since 1.2
1148 */
1149 public static final UnicodeBlock SMALL_FORM_VARIANTS =
1150 new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants",
1151 "SmallFormVariants"});
1152
1153 /**
1154 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1155 * @since 1.2
1156 */
1157 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1158 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B",
1159 "ArabicPresentationForms-B"});
1160
1161 /**
1162 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1163 * @since 1.2
1164 */
1165 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1166 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1167 new String[] {"Halfwidth and Fullwidth Forms",
1168 "HalfwidthandFullwidthForms"});
1169
1170 /**
1171 * Constant for the "Specials" Unicode character block.
1172 * @since 1.2
1173 */
1174 public static final UnicodeBlock SPECIALS =
1175 new UnicodeBlock("SPECIALS");
1176
1177 /**
1178 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1179 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1180 * {@link #LOW_SURROGATES}. These new constants match
1181 * the block definitions of the Unicode Standard.
1182 * The {@link #of(char)} and {@link #of(int)} methods
1183 * return the new constants, not SURROGATES_AREA.
1184 */
1185 @Deprecated
1186 public static final UnicodeBlock SURROGATES_AREA =
1187 new UnicodeBlock("SURROGATES_AREA");
1188
1189 /**
1190 * Constant for the "Syriac" Unicode character block.
1191 * @since 1.4
1192 */
1193 public static final UnicodeBlock SYRIAC =
1194 new UnicodeBlock("SYRIAC");
1195
1196 /**
1197 * Constant for the "Thaana" Unicode character block.
1198 * @since 1.4
1199 */
1200 public static final UnicodeBlock THAANA =
1201 new UnicodeBlock("THAANA");
1202
1203 /**
1204 * Constant for the "Sinhala" Unicode character block.
1205 * @since 1.4
1206 */
1207 public static final UnicodeBlock SINHALA =
1208 new UnicodeBlock("SINHALA");
1209
1210 /**
1211 * Constant for the "Myanmar" Unicode character block.
1212 * @since 1.4
1213 */
1214 public static final UnicodeBlock MYANMAR =
1215 new UnicodeBlock("MYANMAR");
1216
1217 /**
1218 * Constant for the "Ethiopic" Unicode character block.
1219 * @since 1.4
1220 */
1221 public static final UnicodeBlock ETHIOPIC =
1222 new UnicodeBlock("ETHIOPIC");
1223
1224 /**
1225 * Constant for the "Cherokee" Unicode character block.
1226 * @since 1.4
1227 */
1228 public static final UnicodeBlock CHEROKEE =
1229 new UnicodeBlock("CHEROKEE");
1230
1231 /**
1232 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1233 * @since 1.4
1234 */
1235 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1236 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1237 new String[] {"Unified Canadian Aboriginal Syllabics",
1238 "UnifiedCanadianAboriginalSyllabics"});
1239
1240 /**
1241 * Constant for the "Ogham" Unicode character block.
1242 * @since 1.4
1243 */
1244 public static final UnicodeBlock OGHAM =
1245 new UnicodeBlock("OGHAM");
1246
1247 /**
1248 * Constant for the "Runic" Unicode character block.
1249 * @since 1.4
1250 */
1251 public static final UnicodeBlock RUNIC =
1252 new UnicodeBlock("RUNIC");
1253
1254 /**
1255 * Constant for the "Khmer" Unicode character block.
1256 * @since 1.4
1257 */
1258 public static final UnicodeBlock KHMER =
1259 new UnicodeBlock("KHMER");
1260
1261 /**
1262 * Constant for the "Mongolian" Unicode character block.
1263 * @since 1.4
1264 */
1265 public static final UnicodeBlock MONGOLIAN =
1266 new UnicodeBlock("MONGOLIAN");
1267
1268 /**
1269 * Constant for the "Braille Patterns" Unicode character block.
1270 * @since 1.4
1271 */
1272 public static final UnicodeBlock BRAILLE_PATTERNS =
1273 new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns",
1274 "BraillePatterns"});
1275
1276 /**
1277 * Constant for the "CJK Radicals Supplement" Unicode character block.
1278 * @since 1.4
1279 */
1280 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1281 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement",
1282 "CJKRadicalsSupplement"});
1283
1284 /**
1285 * Constant for the "Kangxi Radicals" Unicode character block.
1286 * @since 1.4
1287 */
1288 public static final UnicodeBlock KANGXI_RADICALS =
1289 new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"});
1290
1291 /**
1292 * Constant for the "Ideographic Description Characters" Unicode character block.
1293 * @since 1.4
1294 */
1295 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1296 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters",
1297 "IdeographicDescriptionCharacters"});
1298
1299 /**
1300 * Constant for the "Bopomofo Extended" Unicode character block.
1301 * @since 1.4
1302 */
1303 public static final UnicodeBlock BOPOMOFO_EXTENDED =
1304 new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended",
1305 "BopomofoExtended"});
1306
1307 /**
1308 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1309 * @since 1.4
1310 */
1311 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1312 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A",
1313 "CJKUnifiedIdeographsExtensionA"});
1314
1315 /**
1316 * Constant for the "Yi Syllables" Unicode character block.
1317 * @since 1.4
1318 */
1319 public static final UnicodeBlock YI_SYLLABLES =
1320 new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"});
1321
1322 /**
1323 * Constant for the "Yi Radicals" Unicode character block.
1324 * @since 1.4
1325 */
1326 public static final UnicodeBlock YI_RADICALS =
1327 new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"});
1328
1329
1330 /**
1331 * Constant for the "Cyrillic Supplementary" Unicode character block.
1332 * @since 1.5
1333 */
1334 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1335 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", new String[] {"Cyrillic Supplementary",
1336 "CyrillicSupplementary"});
1337
1338 /**
1339 * Constant for the "Tagalog" Unicode character block.
1340 * @since 1.5
1341 */
1342 public static final UnicodeBlock TAGALOG =
1343 new UnicodeBlock("TAGALOG");
1344
1345 /**
1346 * Constant for the "Hanunoo" Unicode character block.
1347 * @since 1.5
1348 */
1349 public static final UnicodeBlock HANUNOO =
1350 new UnicodeBlock("HANUNOO");
1351
1352 /**
1353 * Constant for the "Buhid" Unicode character block.
1354 * @since 1.5
1355 */
1356 public static final UnicodeBlock BUHID =
1357 new UnicodeBlock("BUHID");
1358
1359 /**
1360 * Constant for the "Tagbanwa" Unicode character block.
1361 * @since 1.5
1362 */
1363 public static final UnicodeBlock TAGBANWA =
1364 new UnicodeBlock("TAGBANWA");
1365
1366 /**
1367 * Constant for the "Limbu" Unicode character block.
1368 * @since 1.5
1369 */
1370 public static final UnicodeBlock LIMBU =
1371 new UnicodeBlock("LIMBU");
1372
1373 /**
1374 * Constant for the "Tai Le" Unicode character block.
1375 * @since 1.5
1376 */
1377 public static final UnicodeBlock TAI_LE =
1378 new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"});
1379
1380 /**
1381 * Constant for the "Khmer Symbols" Unicode character block.
1382 * @since 1.5
1383 */
1384 public static final UnicodeBlock KHMER_SYMBOLS =
1385 new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"});
1386
1387 /**
1388 * Constant for the "Phonetic Extensions" Unicode character block.
1389 * @since 1.5
1390 */
1391 public static final UnicodeBlock PHONETIC_EXTENSIONS =
1392 new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"});
1393
1394 /**
1395 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1396 * @since 1.5
1397 */
1398 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1399 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1400 new String[]{"Miscellaneous Mathematical Symbols-A",
1401 "MiscellaneousMathematicalSymbols-A"});
1402
1403 /**
1404 * Constant for the "Supplemental Arrows-A" Unicode character block.
1405 * @since 1.5
1406 */
1407 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1408 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A",
1409 "SupplementalArrows-A"});
1410
1411 /**
1412 * Constant for the "Supplemental Arrows-B" Unicode character block.
1413 * @since 1.5
1414 */
1415 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1416 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B",
1417 "SupplementalArrows-B"});
1418
1419 /**
1420 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1421 * @since 1.5
1422 */
1423 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1424 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1425 new String[] {"Miscellaneous Mathematical Symbols-B",
1426 "MiscellaneousMathematicalSymbols-B"});
1427
1428 /**
1429 * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1430 * @since 1.5
1431 */
1432 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1433 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1434 new String[]{"Supplemental Mathematical Operators",
1435 "SupplementalMathematicalOperators"} );
1436
1437 /**
1438 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1439 * @since 1.5
1440 */
1441 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1442 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows",
1443 "MiscellaneousSymbolsandArrows"});
1444
1445 /**
1446 * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1447 * @since 1.5
1448 */
1449 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1450 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions",
1451 "KatakanaPhoneticExtensions"});
1452
1453 /**
1454 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1455 * @since 1.5
1456 */
1457 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1458 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols",
1459 "YijingHexagramSymbols"});
1460
1461 /**
1462 * Constant for the "Variation Selectors" Unicode character block.
1463 * @since 1.5
1464 */
1465 public static final UnicodeBlock VARIATION_SELECTORS =
1466 new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"});
1467
1468 /**
1469 * Constant for the "Linear B Syllabary" Unicode character block.
1470 * @since 1.5
1471 */
1472 public static final UnicodeBlock LINEAR_B_SYLLABARY =
1473 new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"});
1474
1475 /**
1476 * Constant for the "Linear B Ideograms" Unicode character block.
1477 * @since 1.5
1478 */
1479 public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1480 new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"});
1481
1482 /**
1483 * Constant for the "Aegean Numbers" Unicode character block.
1484 * @since 1.5
1485 */
1486 public static final UnicodeBlock AEGEAN_NUMBERS =
1487 new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"});
1488
1489 /**
1490 * Constant for the "Old Italic" Unicode character block.
1491 * @since 1.5
1492 */
1493 public static final UnicodeBlock OLD_ITALIC =
1494 new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"});
1495
1496 /**
1497 * Constant for the "Gothic" Unicode character block.
1498 * @since 1.5
1499 */
1500 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
1501
1502 /**
1503 * Constant for the "Ugaritic" Unicode character block.
1504 * @since 1.5
1505 */
1506 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
1507
1508 /**
1509 * Constant for the "Deseret" Unicode character block.
1510 * @since 1.5
1511 */
1512 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
1513
1514 /**
1515 * Constant for the "Shavian" Unicode character block.
1516 * @since 1.5
1517 */
1518 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
1519
1520 /**
1521 * Constant for the "Osmanya" Unicode character block.
1522 * @since 1.5
1523 */
1524 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
1525
1526 /**
1527 * Constant for the "Cypriot Syllabary" Unicode character block.
1528 * @since 1.5
1529 */
1530 public static final UnicodeBlock CYPRIOT_SYLLABARY =
1531 new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"});
1532
1533 /**
1534 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1535 * @since 1.5
1536 */
1537 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1538 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols",
1539 "ByzantineMusicalSymbols"});
1540
1541 /**
1542 * Constant for the "Musical Symbols" Unicode character block.
1543 * @since 1.5
1544 */
1545 public static final UnicodeBlock MUSICAL_SYMBOLS =
1546 new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"});
1547
1548 /**
1549 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1550 * @since 1.5
1551 */
1552 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1553 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols",
1554 "TaiXuanJingSymbols"});
1555
1556 /**
1557 * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1558 * @since 1.5
1559 */
1560 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1561 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1562 new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
1563
1564 /**
1565 * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1566 * @since 1.5
1567 */
1568 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1569 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1570 new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
1571
1572 /**
1573 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1574 * @since 1.5
1575 */
1576 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1577 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1578 new String[]{"CJK Compatibility Ideographs Supplement",
1579 "CJKCompatibilityIdeographsSupplement"});
1580
1581 /**
1582 * Constant for the "Tags" Unicode character block.
1583 * @since 1.5
1584 */
1585 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1586
1587 /**
1588 * Constant for the "Variation Selectors Supplement" Unicode character block.
1589 * @since 1.5
1590 */
1591 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1592 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement",
1593 "VariationSelectorsSupplement"});
1594
1595 /**
1596 * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1597 * @since 1.5
1598 */
1599 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1600 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1601 new String[] {"Supplementary Private Use Area-A",
1602 "SupplementaryPrivateUseArea-A"});
1603
1604 /**
1605 * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1606 * @since 1.5
1607 */
1608 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1609 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1610 new String[] {"Supplementary Private Use Area-B",
1611 "SupplementaryPrivateUseArea-B"});
1612
1613 /**
1614 * Constant for the "High Surrogates" Unicode character block.
1615 * This block represents codepoint values in the high surrogate
1616 * range: 0xD800 through 0xDB7F
1617 *
1618 * @since 1.5
1619 */
1620 public static final UnicodeBlock HIGH_SURROGATES =
1621 new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"});
1622
1623 /**
1624 * Constant for the "High Private Use Surrogates" Unicode character block.
1625 * This block represents codepoint values in the high surrogate
1626 * range: 0xDB80 through 0xDBFF
1627 *
1628 * @since 1.5
1629 */
1630 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1631 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates",
1632 "HighPrivateUseSurrogates"});
1633
1634 /**
1635 * Constant for the "Low Surrogates" Unicode character block.
1636 * This block represents codepoint values in the high surrogate
1637 * range: 0xDC00 through 0xDFFF
1638 *
1639 * @since 1.5
1640 */
1641 public static final UnicodeBlock LOW_SURROGATES =
1642 new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"});
1643
1644 private static final int blockStarts[] = {
1645 0x0000, // Basic Latin
1646 0x0080, // Latin-1 Supplement
1647 0x0100, // Latin Extended-A
1648 0x0180, // Latin Extended-B
1649 0x0250, // IPA Extensions
1650 0x02B0, // Spacing Modifier Letters
1651 0x0300, // Combining Diacritical Marks
1652 0x0370, // Greek and Coptic
1653 0x0400, // Cyrillic
1654 0x0500, // Cyrillic Supplementary
1655 0x0530, // Armenian
1656 0x0590, // Hebrew
1657 0x0600, // Arabic
1658 0x0700, // Syriac
1659 0x0750, // unassigned
1660 0x0780, // Thaana
1661 0x07C0, // unassigned
1662 0x0900, // Devanagari
1663 0x0980, // Bengali
1664 0x0A00, // Gurmukhi
1665 0x0A80, // Gujarati
1666 0x0B00, // Oriya
1667 0x0B80, // Tamil
1668 0x0C00, // Telugu
1669 0x0C80, // Kannada
1670 0x0D00, // Malayalam
1671 0x0D80, // Sinhala
1672 0x0E00, // Thai
1673 0x0E80, // Lao
1674 0x0F00, // Tibetan
1675 0x1000, // Myanmar
1676 0x10A0, // Georgian
1677 0x1100, // Hangul Jamo
1678 0x1200, // Ethiopic
1679 0x1380, // unassigned
1680 0x13A0, // Cherokee
1681 0x1400, // Unified Canadian Aboriginal Syllabics
1682 0x1680, // Ogham
1683 0x16A0, // Runic
1684 0x1700, // Tagalog
1685 0x1720, // Hanunoo
1686 0x1740, // Buhid
1687 0x1760, // Tagbanwa
1688 0x1780, // Khmer
1689 0x1800, // Mongolian
1690 0x18B0, // unassigned
1691 0x1900, // Limbu
1692 0x1950, // Tai Le
1693 0x1980, // unassigned
1694 0x19E0, // Khmer Symbols
1695 0x1A00, // unassigned
1696 0x1D00, // Phonetic Extensions
1697 0x1D80, // unassigned
1698 0x1E00, // Latin Extended Additional
1699 0x1F00, // Greek Extended
1700 0x2000, // General Punctuation
1701 0x2070, // Superscripts and Subscripts
1702 0x20A0, // Currency Symbols
1703 0x20D0, // Combining Diacritical Marks for Symbols
1704 0x2100, // Letterlike Symbols
1705 0x2150, // Number Forms
1706 0x2190, // Arrows
1707 0x2200, // Mathematical Operators
1708 0x2300, // Miscellaneous Technical
1709 0x2400, // Control Pictures
1710 0x2440, // Optical Character Recognition
1711 0x2460, // Enclosed Alphanumerics
1712 0x2500, // Box Drawing
1713 0x2580, // Block Elements
1714 0x25A0, // Geometric Shapes
1715 0x2600, // Miscellaneous Symbols
1716 0x2700, // Dingbats
1717 0x27C0, // Miscellaneous Mathematical Symbols-A
1718 0x27F0, // Supplemental Arrows-A
1719 0x2800, // Braille Patterns
1720 0x2900, // Supplemental Arrows-B
1721 0x2980, // Miscellaneous Mathematical Symbols-B
1722 0x2A00, // Supplemental Mathematical Operators
1723 0x2B00, // Miscellaneous Symbols and Arrows
1724 0x2C00, // unassigned
1725 0x2E80, // CJK Radicals Supplement
1726 0x2F00, // Kangxi Radicals
1727 0x2FE0, // unassigned
1728 0x2FF0, // Ideographic Description Characters
1729 0x3000, // CJK Symbols and Punctuation
1730 0x3040, // Hiragana
1731 0x30A0, // Katakana
1732 0x3100, // Bopomofo
1733 0x3130, // Hangul Compatibility Jamo
1734 0x3190, // Kanbun
1735 0x31A0, // Bopomofo Extended
1736 0x31C0, // unassigned
1737 0x31F0, // Katakana Phonetic Extensions
1738 0x3200, // Enclosed CJK Letters and Months
1739 0x3300, // CJK Compatibility
1740 0x3400, // CJK Unified Ideographs Extension A
1741 0x4DC0, // Yijing Hexagram Symbols
1742 0x4E00, // CJK Unified Ideographs
1743 0xA000, // Yi Syllables
1744 0xA490, // Yi Radicals
1745 0xA4D0, // unassigned
1746 0xAC00, // Hangul Syllables
1747 0xD7B0, // unassigned
1748 0xD800, // High Surrogates
1749 0xDB80, // High Private Use Surrogates
1750 0xDC00, // Low Surrogates
1751 0xE000, // Private Use
1752 0xF900, // CJK Compatibility Ideographs
1753 0xFB00, // Alphabetic Presentation Forms
1754 0xFB50, // Arabic Presentation Forms-A
1755 0xFE00, // Variation Selectors
1756 0xFE10, // unassigned
1757 0xFE20, // Combining Half Marks
1758 0xFE30, // CJK Compatibility Forms
1759 0xFE50, // Small Form Variants
1760 0xFE70, // Arabic Presentation Forms-B
1761 0xFF00, // Halfwidth and Fullwidth Forms
1762 0xFFF0, // Specials
1763 0x10000, // Linear B Syllabary
1764 0x10080, // Linear B Ideograms
1765 0x10100, // Aegean Numbers
1766 0x10140, // unassigned
1767 0x10300, // Old Italic
1768 0x10330, // Gothic
1769 0x10350, // unassigned
1770 0x10380, // Ugaritic
1771 0x103A0, // unassigned
1772 0x10400, // Deseret
1773 0x10450, // Shavian
1774 0x10480, // Osmanya
1775 0x104B0, // unassigned
1776 0x10800, // Cypriot Syllabary
1777 0x10840, // unassigned
1778 0x1D000, // Byzantine Musical Symbols
1779 0x1D100, // Musical Symbols
1780 0x1D200, // unassigned
1781 0x1D300, // Tai Xuan Jing Symbols
1782 0x1D360, // unassigned
1783 0x1D400, // Mathematical Alphanumeric Symbols
1784 0x1D800, // unassigned
1785 0x20000, // CJK Unified Ideographs Extension B
1786 0x2A6E0, // unassigned
1787 0x2F800, // CJK Compatibility Ideographs Supplement
1788 0x2FA20, // unassigned
1789 0xE0000, // Tags
1790 0xE0080, // unassigned
1791 0xE0100, // Variation Selectors Supplement
1792 0xE01F0, // unassigned
1793 0xF0000, // Supplementary Private Use Area-A
1794 0x100000, // Supplementary Private Use Area-B
1795 };
1796
1797 private static final UnicodeBlock[] blocks = {
1798 BASIC_LATIN,
1799 LATIN_1_SUPPLEMENT,
1800 LATIN_EXTENDED_A,
1801 LATIN_EXTENDED_B,
1802 IPA_EXTENSIONS,
1803 SPACING_MODIFIER_LETTERS,
1804 COMBINING_DIACRITICAL_MARKS,
1805 GREEK,
1806 CYRILLIC,
1807 CYRILLIC_SUPPLEMENTARY,
1808 ARMENIAN,
1809 HEBREW,
1810 ARABIC,
1811 SYRIAC,
1812 null,
1813 THAANA,
1814 null,
1815 DEVANAGARI,
1816 BENGALI,
1817 GURMUKHI,
1818 GUJARATI,
1819 ORIYA,
1820 TAMIL,
1821 TELUGU,
1822 KANNADA,
1823 MALAYALAM,
1824 SINHALA,
1825 THAI,
1826 LAO,
1827 TIBETAN,
1828 MYANMAR,
1829 GEORGIAN,
1830 HANGUL_JAMO,
1831 ETHIOPIC,
1832 null,
1833 CHEROKEE,
1834 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1835 OGHAM,
1836 RUNIC,
1837 TAGALOG,
1838 HANUNOO,
1839 BUHID,
1840 TAGBANWA,
1841 KHMER,
1842 MONGOLIAN,
1843 null,
1844 LIMBU,
1845 TAI_LE,
1846 null,
1847 KHMER_SYMBOLS,
1848 null,
1849 PHONETIC_EXTENSIONS,
1850 null,
1851 LATIN_EXTENDED_ADDITIONAL,
1852 GREEK_EXTENDED,
1853 GENERAL_PUNCTUATION,
1854 SUPERSCRIPTS_AND_SUBSCRIPTS,
1855 CURRENCY_SYMBOLS,
1856 COMBINING_MARKS_FOR_SYMBOLS,
1857 LETTERLIKE_SYMBOLS,
1858 NUMBER_FORMS,
1859 ARROWS,
1860 MATHEMATICAL_OPERATORS,
1861 MISCELLANEOUS_TECHNICAL,
1862 CONTROL_PICTURES,
1863 OPTICAL_CHARACTER_RECOGNITION,
1864 ENCLOSED_ALPHANUMERICS,
1865 BOX_DRAWING,
1866 BLOCK_ELEMENTS,
1867 GEOMETRIC_SHAPES,
1868 MISCELLANEOUS_SYMBOLS,
1869 DINGBATS,
1870 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1871 SUPPLEMENTAL_ARROWS_A,
1872 BRAILLE_PATTERNS,
1873 SUPPLEMENTAL_ARROWS_B,
1874 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1875 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1876 MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1877 null,
1878 CJK_RADICALS_SUPPLEMENT,
1879 KANGXI_RADICALS,
1880 null,
1881 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1882 CJK_SYMBOLS_AND_PUNCTUATION,
1883 HIRAGANA,
1884 KATAKANA,
1885 BOPOMOFO,
1886 HANGUL_COMPATIBILITY_JAMO,
1887 KANBUN,
1888 BOPOMOFO_EXTENDED,
1889 null,
1890 KATAKANA_PHONETIC_EXTENSIONS,
1891 ENCLOSED_CJK_LETTERS_AND_MONTHS,
1892 CJK_COMPATIBILITY,
1893 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1894 YIJING_HEXAGRAM_SYMBOLS,
1895 CJK_UNIFIED_IDEOGRAPHS,
1896 YI_SYLLABLES,
1897 YI_RADICALS,
1898 null,
1899 HANGUL_SYLLABLES,
1900 null,
1901 HIGH_SURROGATES,
1902 HIGH_PRIVATE_USE_SURROGATES,
1903 LOW_SURROGATES,
1904 PRIVATE_USE_AREA,
1905 CJK_COMPATIBILITY_IDEOGRAPHS,
1906 ALPHABETIC_PRESENTATION_FORMS,
1907 ARABIC_PRESENTATION_FORMS_A,
1908 VARIATION_SELECTORS,
1909 null,
1910 COMBINING_HALF_MARKS,
1911 CJK_COMPATIBILITY_FORMS,
1912 SMALL_FORM_VARIANTS,
1913 ARABIC_PRESENTATION_FORMS_B,
1914 HALFWIDTH_AND_FULLWIDTH_FORMS,
1915 SPECIALS,
1916 LINEAR_B_SYLLABARY,
1917 LINEAR_B_IDEOGRAMS,
1918 AEGEAN_NUMBERS,
1919 null,
1920 OLD_ITALIC,
1921 GOTHIC,
1922 null,
1923 UGARITIC,
1924 null,
1925 DESERET,
1926 SHAVIAN,
1927 OSMANYA,
1928 null,
1929 CYPRIOT_SYLLABARY,
1930 null,
1931 BYZANTINE_MUSICAL_SYMBOLS,
1932 MUSICAL_SYMBOLS,
1933 null,
1934 TAI_XUAN_JING_SYMBOLS,
1935 null,
1936 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1937 null,
1938 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1939 null,
1940 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1941 null,
1942 TAGS,
1943 null,
1944 VARIATION_SELECTORS_SUPPLEMENT,
1945 null,
1946 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1947 SUPPLEMENTARY_PRIVATE_USE_AREA_B
1948 };
1949
1950
1951 /**
1952 * Returns the object representing the Unicode block containing the
1953 * given character, or <code>null</code> if the character is not a
1954 * member of a defined block.
1955 *
1956 * <p><b>Note:</b> This method cannot handle <a
1957 * href="Character.html#supplementary"> supplementary
1958 * characters</a>. To support all Unicode characters,
1959 * including supplementary characters, use the {@link
1960 * #of(int)} method.
1961 *
1962 * @param c The character in question
1963 * @return The <code>UnicodeBlock</code> instance representing the
1964 * Unicode block of which this character is a member, or
1965 * <code>null</code> if the character is not a member of any
1966 * Unicode block
1967 */
1968 public static UnicodeBlock of(char c) {
1969 return of((int)c);
1970 }
1971
1972
1973 /**
1974 * Returns the object representing the Unicode block
1975 * containing the given character (Unicode code point), or
1976 * <code>null</code> if the character is not a member of a
1977 * defined block.
1978 *
1979 * @param codePoint the character (Unicode code point) in question.
1980 * @return The <code>UnicodeBlock</code> instance representing the
1981 * Unicode block of which this character is a member, or
1982 * <code>null</code> if the character is not a member of any
1983 * Unicode block
1984 * @exception IllegalArgumentException if the specified
1985 * <code>codePoint</code> is an invalid Unicode code point.
1986 * @see Character#isValidCodePoint(int)
1987 * @since 1.5
1988 */
1989 public static UnicodeBlock of(int codePoint) {
1990 if (!isValidCodePoint(codePoint)) {
1991 throw new IllegalArgumentException();
1992 }
1993
1994 int top, bottom, current;
1995 bottom = 0;
1996 top = blockStarts.length;
1997 current = top/2;
1998
1999 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
2000 while (top - bottom > 1) {
2001 if (codePoint >= blockStarts[current]) {
2002 bottom = current;
2003 } else {
2004 top = current;
2005 }
2006 current = (top + bottom) / 2;
2007 }
2008 return blocks[current];
2009 }
2010
2011 /**
2012 * Returns the UnicodeBlock with the given name. Block
2013 * names are determined by The Unicode Standard. The file
2014 * Blocks-<version>.txt defines blocks for a particular
2015 * version of the standard. The {@link Character} class specifies
2016 * the version of the standard that it supports.
2017 * <p>
2018 * This method accepts block names in the following forms:
2019 * <ol>
2020 * <li> Canonical block names as defined by the Unicode Standard.
2021 * For example, the standard defines a "Basic Latin" block. Therefore, this
2022 * method accepts "Basic Latin" as a valid block name. The documentation of
2023 * each UnicodeBlock provides the canonical name.
2024 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
2025 * is a valid block name for the "Basic Latin" block.
2026 * <li>The text representation of each constant UnicodeBlock identifier.
2027 * For example, this method will return the {@link #BASIC_LATIN} block if
2028 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
2029 * hyphens in the canonical name with underscores.
2030 * </ol>
2031 * Finally, character case is ignored for all of the valid block name forms.
2032 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
2033 * The en_US locale's case mapping rules are used to provide case-insensitive
2034 * string comparisons for block name validation.
2035 * <p>
2036 * If the Unicode Standard changes block names, both the previous and
2037 * current names will be accepted.
2038 *
2039 * @param blockName A <code>UnicodeBlock</code> name.
2040 * @return The <code>UnicodeBlock</code> instance identified
2041 * by <code>blockName</code>
2042 * @throws IllegalArgumentException if <code>blockName</code> is an
2043 * invalid name
2044 * @throws NullPointerException if <code>blockName</code> is null
2045 * @since 1.5
2046 */
2047 public static final UnicodeBlock forName(String blockName) {
2048 UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
2049 if (block == null) {
2050 throw new IllegalArgumentException();
2051 }
2052 return block;
2053 }
2054 }
2055
2056
2057 /**
2058 * The value of the <code>Character</code>.
2059 *
2060 * @serial
2061 */
2062 private final char value;
2063
2064 /** use serialVersionUID from JDK 1.0.2 for interoperability */
2065 private static final long serialVersionUID = 3786198910865385080L;
2066
2067 /**
2068 * Constructs a newly allocated <code>Character</code> object that
2069 * represents the specified <code>char</code> value.
2070 *
2071 * @param value the value to be represented by the
2072 * <code>Character</code> object.
2073 */
2074 public Character(char value) {
2075 this.value = value;
2076 }
2077
2078 private static class CharacterCache {
2079 private CharacterCache(){}
2080
2081 static final Character cache[] = new Character[127 + 1];
2082
2083 static {
2084 for(int i = 0; i < cache.length; i++)
2085 cache[i] = new Character((char)i);
2086 }
2087 }
2088
2089 /**
2090 * Returns a <tt>Character</tt> instance representing the specified
2091 * <tt>char</tt> value.
2092 * If a new <tt>Character</tt> instance is not required, this method
2093 * should generally be used in preference to the constructor
2094 * {@link #Character(char)}, as this method is likely to yield
2095 * significantly better space and time performance by caching
2096 * frequently requested values.
2097 *
2098 * @param c a char value.
2099 * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2100 * @since 1.5
2101 */
2102 public static Character valueOf(char c) {
2103 if(c <= 127) { // must cache
2104 return CharacterCache.cache[(int)c];
2105 }
2106 return new Character(c);
2107 }
2108
2109 /**
2110 * Returns the value of this <code>Character</code> object.
2111 * @return the primitive <code>char</code> value represented by
2112 * this object.
2113 */
2114 public char charValue() {
2115 return value;
2116 }
2117
2118 /**
2119 * Returns a hash code for this <code>Character</code>.
2120 * @return a hash code value for this object.
2121 */
2122 public int hashCode() {
2123 return (int)value;
2124 }
2125
2126 /**
2127 * Compares this object against the specified object.
2128 * The result is <code>true</code> if and only if the argument is not
2129 * <code>null</code> and is a <code>Character</code> object that
2130 * represents the same <code>char</code> value as this object.
2131 *
2132 * @param obj the object to compare with.
2133 * @return <code>true</code> if the objects are the same;
2134 * <code>false</code> otherwise.
2135 */
2136 public boolean equals(Object obj) {
2137 if (obj instanceof Character) {
2138 return value == ((Character)obj).charValue();
2139 }
2140 return false;
2141 }
2142
2143 /**
2144 * Returns a <code>String</code> object representing this
2145 * <code>Character</code>'s value. The result is a string of
2146 * length 1 whose sole component is the primitive
2147 * <code>char</code> value represented by this
2148 * <code>Character</code> object.
2149 *
2150 * @return a string representation of this object.
2151 */
2152 public String toString() {
2153 char buf[] = {value};
2154 return String.valueOf(buf);
2155 }
2156
2157 /**
2158 * Returns a <code>String</code> object representing the
2159 * specified <code>char</code>. The result is a string of length
2160 * 1 consisting solely of the specified <code>char</code>.
2161 *
2162 * @param c the <code>char</code> to be converted
2163 * @return the string representation of the specified <code>char</code>
2164 * @since 1.4
2165 */
2166 public static String toString(char c) {
2167 return String.valueOf(c);
2168 }
2169
2170 /**
2171 * Determines whether the specified code point is a valid Unicode
2172 * code point value in the range of <code>0x0000</code> to
2173 * <code>0x10FFFF</code> inclusive. This method is equivalent to
2174 * the expression:
2175 *
2176 * <blockquote><pre>
2177 * codePoint >= 0x0000 && codePoint <= 0x10FFFF
2178 * </pre></blockquote>
2179 *
2180 * @param codePoint the Unicode code point to be tested
2181 * @return <code>true</code> if the specified code point value
2182 * is a valid code point value;
2183 * <code>false</code> otherwise.
2184 * @since 1.5
2185 */
2186 public static boolean isValidCodePoint(int codePoint) {
2187 return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2188 }
2189
2190 /**
2191 * Determines whether the specified character (Unicode code point)
2192 * is in the supplementary character range. The method call is
2193 * equivalent to the expression:
2194 * <blockquote><pre>
2195 * codePoint >= 0x10000 && codePoint <= 0x10FFFF
2196 * </pre></blockquote>
2197 *
2198 * @param codePoint the character (Unicode code point) to be tested
2199 * @return <code>true</code> if the specified character is in the Unicode
2200 * supplementary character range; <code>false</code> otherwise.
2201 * @since 1.5
2202 */
2203 public static boolean isSupplementaryCodePoint(int codePoint) {
2204 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2205 && codePoint <= MAX_CODE_POINT;
2206 }
2207
2208 /**
2209 * Determines if the given <code>char</code> value is a
2210 * high-surrogate code unit (also known as <i>leading-surrogate
2211 * code unit</i>). Such values do not represent characters by
2212 * themselves, but are used in the representation of <a
2213 * href="#supplementary">supplementary characters</a> in the
2214 * UTF-16 encoding.
2215 *
2216 * <p>This method returns <code>true</code> if and only if
2217 * <blockquote><pre>ch >= '\uD800' && ch <= '\uDBFF'
2218 * </pre></blockquote>
2219 * is <code>true</code>.
2220 *
2221 * @param ch the <code>char</code> value to be tested.
2222 * @return <code>true</code> if the <code>char</code> value
2223 * is between '\uD800' and '\uDBFF' inclusive;
2224 * <code>false</code> otherwise.
2225 * @see java.lang.Character#isLowSurrogate(char)
2226 * @see Character.UnicodeBlock#of(int)
2227 * @since 1.5
2228 */
2229 public static boolean isHighSurrogate(char ch) {
2230 return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2231 }
2232
2233 /**
2234 * Determines if the given <code>char</code> value is a
2235 * low-surrogate code unit (also known as <i>trailing-surrogate code
2236 * unit</i>). Such values do not represent characters by themselves,
2237 * but are used in the representation of <a
2238 * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
2239 *
2240 * <p> This method returns <code>true</code> if and only if
2241 * <blockquote><pre>ch >= '\uDC00' && ch <= '\uDFFF'
2242 * </pre></blockquote> is <code>true</code>.
2243 *
2244 * @param ch the <code>char</code> value to be tested.
2245 * @return <code>true</code> if the <code>char</code> value
2246 * is between '\uDC00' and '\uDFFF' inclusive;
2247 * <code>false</code> otherwise.
2248 * @see java.lang.Character#isHighSurrogate(char)
2249 * @since 1.5
2250 */
2251 public static boolean isLowSurrogate(char ch) {
2252 return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2253 }
2254
2255 /**
2256 * Determines whether the specified pair of <code>char</code>
2257 * values is a valid surrogate pair. This method is equivalent to
2258 * the expression:
2259 * <blockquote><pre>
2260 * isHighSurrogate(high) && isLowSurrogate(low)
2261 * </pre></blockquote>
2262 *
2263 * @param high the high-surrogate code value to be tested
2264 * @param low the low-surrogate code value to be tested
2265 * @return <code>true</code> if the specified high and
2266 * low-surrogate code values represent a valid surrogate pair;
2267 * <code>false</code> otherwise.
2268 * @since 1.5
2269 */
2270 public static boolean isSurrogatePair(char high, char low) {
2271 return isHighSurrogate(high) && isLowSurrogate(low);
2272 }
2273
2274 /**
2275 * Determines the number of <code>char</code> values needed to
2276 * represent the specified character (Unicode code point). If the
2277 * specified character is equal to or greater than 0x10000, then
2278 * the method returns 2. Otherwise, the method returns 1.
2279 *
2280 * <p>This method doesn't validate the specified character to be a
2281 * valid Unicode code point. The caller must validate the
2282 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2283 * if necessary.
2284 *
2285 * @param codePoint the character (Unicode code point) to be tested.
2286 * @return 2 if the character is a valid supplementary character; 1 otherwise.
2287 * @see #isSupplementaryCodePoint(int)
2288 * @since 1.5
2289 */
2290 public static int charCount(int codePoint) {
2291 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
2292 }
2293
2294 /**
2295 * Converts the specified surrogate pair to its supplementary code
2296 * point value. This method does not validate the specified
2297 * surrogate pair. The caller must validate it using {@link
2298 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2299 *
2300 * @param high the high-surrogate code unit
2301 * @param low the low-surrogate code unit
2302 * @return the supplementary code point composed from the
2303 * specified surrogate pair.
2304 * @since 1.5
2305 */
2306 public static int toCodePoint(char high, char low) {
2307 return ((high - MIN_HIGH_SURROGATE) << 10)
2308 + (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
2309 }
2310
2311 /**
2312 * Returns the code point at the given index of the
2313 * <code>CharSequence</code>. If the <code>char</code> value at
2314 * the given index in the <code>CharSequence</code> is in the
2315 * high-surrogate range, the following index is less than the
2316 * length of the <code>CharSequence</code>, and the
2317 * <code>char</code> value at the following index is in the
2318 * low-surrogate range, then the supplementary code point
2319 * corresponding to this surrogate pair is returned. Otherwise,
2320 * the <code>char</code> value at the given index is returned.
2321 *
2322 * @param seq a sequence of <code>char</code> values (Unicode code
2323 * units)
2324 * @param index the index to the <code>char</code> values (Unicode
2325 * code units) in <code>seq</code> to be converted
2326 * @return the Unicode code point at the given index
2327 * @exception NullPointerException if <code>seq</code> is null.
2328 * @exception IndexOutOfBoundsException if the value
2329 * <code>index</code> is negative or not less than
2330 * {@link CharSequence#length() seq.length()}.
2331 * @since 1.5
2332 */
2333 public static int codePointAt(CharSequence seq, int index) {
2334 char c1 = seq.charAt(index++);
2335 if (isHighSurrogate(c1)) {
2336 if (index < seq.length()) {
2337 char c2 = seq.charAt(index);
2338 if (isLowSurrogate(c2)) {
2339 return toCodePoint(c1, c2);
2340 }
2341 }
2342 }
2343 return c1;
2344 }
2345
2346 /**
2347 * Returns the code point at the given index of the
2348 * <code>char</code> array. If the <code>char</code> value at
2349 * the given index in the <code>char</code> array is in the
2350 * high-surrogate range, the following index is less than the
2351 * length of the <code>char</code> array, and the
2352 * <code>char</code> value at the following index is in the
2353 * low-surrogate range, then the supplementary code point
2354 * corresponding to this surrogate pair is returned. Otherwise,
2355 * the <code>char</code> value at the given index is returned.
2356 *
2357 * @param a the <code>char</code> array
2358 * @param index the index to the <code>char</code> values (Unicode
2359 * code units) in the <code>char</code> array to be converted
2360 * @return the Unicode code point at the given index
2361 * @exception NullPointerException if <code>a</code> is null.
2362 * @exception IndexOutOfBoundsException if the value
2363 * <code>index</code> is negative or not less than
2364 * the length of the <code>char</code> array.
2365 * @since 1.5
2366 */
2367 public static int codePointAt(char[] a, int index) {
2368 return codePointAtImpl(a, index, a.length);
2369 }
2370
2371 /**
2372 * Returns the code point at the given index of the
2373 * <code>char</code> array, where only array elements with
2374 * <code>index</code> less than <code>limit</code> can be used. If
2375 * the <code>char</code> value at the given index in the
2376 * <code>char</code> array is in the high-surrogate range, the
2377 * following index is less than the <code>limit</code>, and the
2378 * <code>char</code> value at the following index is in the
2379 * low-surrogate range, then the supplementary code point
2380 * corresponding to this surrogate pair is returned. Otherwise,
2381 * the <code>char</code> value at the given index is returned.
2382 *
2383 * @param a the <code>char</code> array
2384 * @param index the index to the <code>char</code> values (Unicode
2385 * code units) in the <code>char</code> array to be converted
2386 * @param limit the index after the last array element that can be used in the
2387 * <code>char</code> array
2388 * @return the Unicode code point at the given index
2389 * @exception NullPointerException if <code>a</code> is null.
2390 * @exception IndexOutOfBoundsException if the <code>index</code>
2391 * argument is negative or not less than the <code>limit</code>
2392 * argument, or if the <code>limit</code> argument is negative or
2393 * greater than the length of the <code>char</code> array.
2394 * @since 1.5
2395 */
2396 public static int codePointAt(char[] a, int index, int limit) {
2397 if (index >= limit || limit < 0 || limit > a.length) {
2398 throw new IndexOutOfBoundsException();
2399 }
2400 return codePointAtImpl(a, index, limit);
2401 }
2402
2403 static int codePointAtImpl(char[] a, int index, int limit) {
2404 char c1 = a[index++];
2405 if (isHighSurrogate(c1)) {
2406 if (index < limit) {
2407 char c2 = a[index];
2408 if (isLowSurrogate(c2)) {
2409 return toCodePoint(c1, c2);
2410 }
2411 }
2412 }
2413 return c1;
2414 }
2415
2416 /**
2417 * Returns the code point precedi