1 /*
2 * Copyright 2002-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26 package java.lang;
27 import java.util.Map;
28 import java.util.HashMap;
29 import java.util.Locale;
30
31 /**
32 * The <code>Character</code> class wraps a value of the primitive
33 * type <code>char</code> in an object. An object of type
34 * <code>Character</code> contains a single field whose type is
35 * <code>char</code>.
36 * <p>
37 * In addition, this class provides several methods for determining
38 * a character's category (lowercase letter, digit, etc.) and for converting
39 * characters from uppercase to lowercase and vice versa.
40 * <p>
41 * Character information is based on the Unicode Standard, version 4.0.
42 * <p>
43 * The methods and data of class <code>Character</code> are defined by
44 * the information in the <i>UnicodeData</i> file that is part of the
45 * Unicode Character Database maintained by the Unicode
46 * Consortium. This file specifies various properties including name
47 * and general category for every defined Unicode code point or
48 * character range.
49 * <p>
50 * The file and its description are available from the Unicode Consortium at:
51 * <ul>
52 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
53 * </ul>
54 *
55 * <h4><a name="unicode">Unicode Character Representations</a></h4>
56 *
57 * <p>The <code>char</code> data type (and therefore the value that a
58 * <code>Character</code> object encapsulates) are based on the
59 * original Unicode specification, which defined characters as
60 * fixed-width 16-bit entities. The Unicode standard has since been
61 * changed to allow for characters whose representation requires more
62 * than 16 bits. The range of legal <em>code point</em>s is now
63 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
64 * (Refer to the <a
65 * href="http://www.unicode.org/reports/tr27/#notation"><i>
66 * definition</i></a> of the U+<i>n</i> notation in the Unicode
67 * standard.)
68 *
69 * <p>The set of characters from U+0000 to U+FFFF is sometimes
70 * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
71 * name="supplementary">Characters</a> whose code points are greater
72 * than U+FFFF are called <em>supplementary character</em>s. The Java
73 * 2 platform uses the UTF-16 representation in <code>char</code>
74 * arrays and in the <code>String</code> and <code>StringBuffer</code>
75 * classes. In this representation, supplementary characters are
76 * represented as a pair of <code>char</code> values, the first from
77 * the <em>high-surrogates</em> range, (\uD800-\uDBFF), the
78 * second from the <em>low-surrogates</em> range
79 * (\uDC00-\uDFFF).
80 *
81 * <p>A <code>char</code> value, therefore, represents Basic
82 * Multilingual Plane (BMP) code points, including the surrogate
83 * code points, or code units of the UTF-16 encoding. An
84 * <code>int</code> value represents all Unicode code points,
85 * including supplementary code points. The lower (least significant)
86 * 21 bits of <code>int</code> are used to represent Unicode code
87 * points and the upper (most significant) 11 bits must be zero.
88 * Unless otherwise specified, the behavior with respect to
89 * supplementary characters and surrogate <code>char</code> values is
90 * as follows:
91 *
92 * <ul>
93 * <li>The methods that only accept a <code>char</code> value cannot support
94 * supplementary characters. They treat <code>char</code> values from the
95 * surrogate ranges as undefined characters. For example,
96 * <code>Character.isLetter('\uD840')</code> returns <code>false</code>, even though
97 * this specific value if followed by any low-surrogate value in a string
98 * would represent a letter.
99 *
100 * <li>The methods that accept an <code>int</code> value support all
101 * Unicode characters, including supplementary characters. For
102 * example, <code>Character.isLetter(0x2F81A)</code> returns
103 * <code>true</code> because the code point value represents a letter
104 * (a CJK ideograph).
105 * </ul>
106 *
107 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
108 * used for character values in the range between U+0000 and U+10FFFF,
109 * and <em>Unicode code unit</em> is used for 16-bit
110 * <code>char</code> values that are code units of the <em>UTF-16</em>
111 * encoding. For more information on Unicode terminology, refer to the
112 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
113 *
114 * @author Lee Boynton
115 * @author Guy Steele
116 * @author Akira Tanaka
117 * @since 1.0
118 */
119 public final
120 class Character extends Object implements java.io.Serializable, Comparable<Character> {
121 /**
122 * The minimum radix available for conversion to and from strings.
123 * The constant value of this field is the smallest value permitted
124 * for the radix argument in radix-conversion methods such as the
125 * <code>digit</code> method, the <code>forDigit</code>
126 * method, and the <code>toString</code> method of class
127 * <code>Integer</code>.
128 *
129 * @see java.lang.Character#digit(char, int)
130 * @see java.lang.Character#forDigit(int, int)
131 * @see java.lang.Integer#toString(int, int)
132 * @see java.lang.Integer#valueOf(java.lang.String)
133 */
134 public static final int MIN_RADIX = 2;
135
136 /**
137 * The maximum radix available for conversion to and from strings.
138 * The constant value of this field is the largest value permitted
139 * for the radix argument in radix-conversion methods such as the
140 * <code>digit</code> method, the <code>forDigit</code>
141 * method, and the <code>toString</code> method of class
142 * <code>Integer</code>.
143 *
144 * @see java.lang.Character#digit(char, int)
145 * @see java.lang.Character#forDigit(int, int)
146 * @see java.lang.Integer#toString(int, int)
147 * @see java.lang.Integer#valueOf(java.lang.String)
148 */
149 public static final int MAX_RADIX = 36;
150
151 /**
152 * The constant value of this field is the smallest value of type
153 * <code>char</code>, <code>'\u0000'</code>.
154 *
155 * @since 1.0.2
156 */
157 public static final char MIN_VALUE = '\u0000';
158
159 /**
160 * The constant value of this field is the largest value of type
161 * <code>char</code>, <code>'\uFFFF'</code>.
162 *
163 * @since 1.0.2
164 */
165 public static final char MAX_VALUE = '\uffff';
166
167 /**
168 * The <code>Class</code> instance representing the primitive type
169 * <code>char</code>.
170 *
171 * @since 1.1
172 */
173 public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
174
175 /*
176 * Normative general types
177 */
178
179 /*
180 * General character types
181 */
182
183 /**
184 * General category "Cn" in the Unicode specification.
185 * @since 1.1
186 */
187 public static final byte
188 UNASSIGNED = 0;
189
190 /**
191 * General category "Lu" in the Unicode specification.
192 * @since 1.1
193 */
194 public static final byte
195 UPPERCASE_LETTER = 1;
196
197 /**
198 * General category "Ll" in the Unicode specification.
199 * @since 1.1
200 */
201 public static final byte
202 LOWERCASE_LETTER = 2;
203
204 /**
205 * General category "Lt" in the Unicode specification.
206 * @since 1.1
207 */
208 public static final byte
209 TITLECASE_LETTER = 3;
210
211 /**
212 * General category "Lm" in the Unicode specification.
213 * @since 1.1
214 */
215 public static final byte
216 MODIFIER_LETTER = 4;
217
218 /**
219 * General category "Lo" in the Unicode specification.
220 * @since 1.1
221 */
222 public static final byte
223 OTHER_LETTER = 5;
224
225 /**
226 * General category "Mn" in the Unicode specification.
227 * @since 1.1
228 */
229 public static final byte
230 NON_SPACING_MARK = 6;
231
232 /**
233 * General category "Me" in the Unicode specification.
234 * @since 1.1
235 */
236 public static final byte
237 ENCLOSING_MARK = 7;
238
239 /**
240 * General category "Mc" in the Unicode specification.
241 * @since 1.1
242 */
243 public static final byte
244 COMBINING_SPACING_MARK = 8;
245
246 /**
247 * General category "Nd" in the Unicode specification.
248 * @since 1.1
249 */
250 public static final byte
251 DECIMAL_DIGIT_NUMBER = 9;
252
253 /**
254 * General category "Nl" in the Unicode specification.
255 * @since 1.1
256 */
257 public static final byte
258 LETTER_NUMBER = 10;
259
260 /**
261 * General category "No" in the Unicode specification.
262 * @since 1.1
263 */
264 public static final byte
265 OTHER_NUMBER = 11;
266
267 /**
268 * General category "Zs" in the Unicode specification.
269 * @since 1.1
270 */
271 public static final byte
272 SPACE_SEPARATOR = 12;
273
274 /**
275 * General category "Zl" in the Unicode specification.
276 * @since 1.1
277 */
278 public static final byte
279 LINE_SEPARATOR = 13;
280
281 /**
282 * General category "Zp" in the Unicode specification.
283 * @since 1.1
284 */
285 public static final byte
286 PARAGRAPH_SEPARATOR = 14;
287
288 /**
289 * General category "Cc" in the Unicode specification.
290 * @since 1.1
291 */
292 public static final byte
293 CONTROL = 15;
294
295 /**
296 * General category "Cf" in the Unicode specification.
297 * @since 1.1
298 */
299 public static final byte
300 FORMAT = 16;
301
302 /**
303 * General category "Co" in the Unicode specification.
304 * @since 1.1
305 */
306 public static final byte
307 PRIVATE_USE = 18;
308
309 /**
310 * General category "Cs" in the Unicode specification.
311 * @since 1.1
312 */
313 public static final byte
314 SURROGATE = 19;
315
316 /**
317 * General category "Pd" in the Unicode specification.
318 * @since 1.1
319 */
320 public static final byte
321 DASH_PUNCTUATION = 20;
322
323 /**
324 * General category "Ps" in the Unicode specification.
325 * @since 1.1
326 */
327 public static final byte
328 START_PUNCTUATION = 21;
329
330 /**
331 * General category "Pe" in the Unicode specification.
332 * @since 1.1
333 */
334 public static final byte
335 END_PUNCTUATION = 22;
336
337 /**
338 * General category "Pc" in the Unicode specification.
339 * @since 1.1
340 */
341 public static final byte
342 CONNECTOR_PUNCTUATION = 23;
343
344 /**
345 * General category "Po" in the Unicode specification.
346 * @since 1.1
347 */
348 public static final byte
349 OTHER_PUNCTUATION = 24;
350
351 /**
352 * General category "Sm" in the Unicode specification.
353 * @since 1.1
354 */
355 public static final byte
356 MATH_SYMBOL = 25;
357
358 /**
359 * General category "Sc" in the Unicode specification.
360 * @since 1.1
361 */
362 public static final byte
363 CURRENCY_SYMBOL = 26;
364
365 /**
366 * General category "Sk" in the Unicode specification.
367 * @since 1.1
368 */
369 public static final byte
370 MODIFIER_SYMBOL = 27;
371
372 /**
373 * General category "So" in the Unicode specification.
374 * @since 1.1
375 */
376 public static final byte
377 OTHER_SYMBOL = 28;
378
379 /**
380 * General category "Pi" in the Unicode specification.
381 * @since 1.4
382 */
383 public static final byte
384 INITIAL_QUOTE_PUNCTUATION = 29;
385
386 /**
387 * General category "Pf" in the Unicode specification.
388 * @since 1.4
389 */
390 public static final byte
391 FINAL_QUOTE_PUNCTUATION = 30;
392
393 /**
394 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
395 */
396 static final int ERROR = 0xFFFFFFFF;
397
398
399 /**
400 * Undefined bidirectional character type. Undefined <code>char</code>
401 * values have undefined directionality in the Unicode specification.
402 * @since 1.4
403 */
404 public static final byte DIRECTIONALITY_UNDEFINED = -1;
405
406 /**
407 * Strong bidirectional character type "L" in the Unicode specification.
408 * @since 1.4
409 */
410 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
411
412 /**
413 * Strong bidirectional character type "R" in the Unicode specification.
414 * @since 1.4
415 */
416 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
417
418 /**
419 * Strong bidirectional character type "AL" in the Unicode specification.
420 * @since 1.4
421 */
422 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
423
424 /**
425 * Weak bidirectional character type "EN" in the Unicode specification.
426 * @since 1.4
427 */
428 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
429
430 /**
431 * Weak bidirectional character type "ES" in the Unicode specification.
432 * @since 1.4
433 */
434 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
435
436 /**
437 * Weak bidirectional character type "ET" in the Unicode specification.
438 * @since 1.4
439 */
440 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
441
442 /**
443 * Weak bidirectional character type "AN" in the Unicode specification.
444 * @since 1.4
445 */
446 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
447
448 /**
449 * Weak bidirectional character type "CS" in the Unicode specification.
450 * @since 1.4
451 */
452 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
453
454 /**
455 * Weak bidirectional character type "NSM" in the Unicode specification.
456 * @since 1.4
457 */
458 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
459
460 /**
461 * Weak bidirectional character type "BN" in the Unicode specification.
462 * @since 1.4
463 */
464 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
465
466 /**
467 * Neutral bidirectional character type "B" in the Unicode specification.
468 * @since 1.4
469 */
470 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
471
472 /**
473 * Neutral bidirectional character type "S" in the Unicode specification.
474 * @since 1.4
475 */
476 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
477
478 /**
479 * Neutral bidirectional character type "WS" in the Unicode specification.
480 * @since 1.4
481 */
482 public static final byte DIRECTIONALITY_WHITESPACE = 12;
483
484 /**
485 * Neutral bidirectional character type "ON" in the Unicode specification.
486 * @since 1.4
487 */
488 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
489
490 /**
491 * Strong bidirectional character type "LRE" in the Unicode specification.
492 * @since 1.4
493 */
494 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
495
496 /**
497 * Strong bidirectional character type "LRO" in the Unicode specification.
498 * @since 1.4
499 */
500 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
501
502 /**
503 * Strong bidirectional character type "RLE" in the Unicode specification.
504 * @since 1.4
505 */
506 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
507
508 /**
509 * Strong bidirectional character type "RLO" in the Unicode specification.
510 * @since 1.4
511 */
512 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
513
514 /**
515 * Weak bidirectional character type "PDF" in the Unicode specification.
516 * @since 1.4
517 */
518 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
519
520 /**
521 * The minimum value of a Unicode high-surrogate code unit in the
522 * UTF-16 encoding. A high-surrogate is also known as a
523 * <i>leading-surrogate</i>.
524 *
525 * @since 1.5
526 */
527 public static final char MIN_HIGH_SURROGATE = '\uD800';
528
529 /**
530 * The maximum value of a Unicode high-surrogate code unit in the
531 * UTF-16 encoding. A high-surrogate is also known as a
532 * <i>leading-surrogate</i>.
533 *
534 * @since 1.5
535 */
536 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
537
538 /**
539 * The minimum value of a Unicode low-surrogate code unit in the
540 * UTF-16 encoding. A low-surrogate is also known as a
541 * <i>trailing-surrogate</i>.
542 *
543 * @since 1.5
544 */
545 public static final char MIN_LOW_SURROGATE = '\uDC00';
546
547 /**
548 * The maximum value of a Unicode low-surrogate code unit in the
549 * UTF-16 encoding. A low-surrogate is also known as a
550 * <i>trailing-surrogate</i>.
551 *
552 * @since 1.5
553 */
554 public static final char MAX_LOW_SURROGATE = '\uDFFF';
555
556 /**
557 * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
558 *
559 * @since 1.5
560 */
561 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
562
563 /**
564 * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
565 *
566 * @since 1.5
567 */
568 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
569
570 /**
571 * The minimum value of a supplementary code point.
572 *
573 * @since 1.5
574 */
575 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
576
577 /**
578 * The minimum value of a Unicode code point.
579 *
580 * @since 1.5
581 */
582 public static final int MIN_CODE_POINT = 0x000000;
583
584 /**
585 * The maximum value of a Unicode code point.
586 *
587 * @since 1.5
588 */
589 public static final int MAX_CODE_POINT = 0x10ffff;
590
591
592 /**
593 * Instances of this class represent particular subsets of the Unicode
594 * character set. The only family of subsets defined in the
595 * <code>Character</code> class is <code>{@link Character.UnicodeBlock
596 * UnicodeBlock}</code>. Other portions of the Java API may define other
597 * subsets for their own purposes.
598 *
599 * @since 1.2
600 */
601 public static class Subset {
602
603 private String name;
604
605 /**
606 * Constructs a new <code>Subset</code> instance.
607 *
608 * @exception NullPointerException if name is <code>null</code>
609 * @param name The name of this subset
610 */
611 protected Subset(String name) {
612 if (name == null) {
613 throw new NullPointerException("name");
614 }
615 this.name = name;
616 }
617
618 /**
619 * Compares two <code>Subset</code> objects for equality.
620 * This method returns <code>true</code> if and only if
621 * <code>this</code> and the argument refer to the same
622 * object; since this method is <code>final</code>, this
623 * guarantee holds for all subclasses.
624 */
625 public final boolean equals(Object obj) {
626 return (this == obj);
627 }
628
629 /**
630 * Returns the standard hash code as defined by the
631 * <code>{@link Object#hashCode}</code> method. This method
632 * is <code>final</code> in order to ensure that the
633 * <code>equals</code> and <code>hashCode</code> methods will
634 * be consistent in all subclasses.
635 */
636 public final int hashCode() {
637 return super.hashCode();
638 }
639
640 /**
641 * Returns the name of this subset.
642 */
643 public final String toString() {
644 return name;
645 }
646 }
647
648 /**
649 * A family of character subsets representing the character blocks in the
650 * Unicode specification. Character blocks generally define characters
651 * used for a specific script or purpose. A character is contained by
652 * at most one Unicode block.
653 *
654 * @since 1.2
655 */
656 public static final class UnicodeBlock extends Subset {
657
658 private static Map map = new HashMap();
659
660 /**
661 * Create a UnicodeBlock with the given identifier name.
662 * This name must be the same as the block identifier.
663 */
664 private UnicodeBlock(String idName) {
665 super(idName);
666 map.put(idName.toUpperCase(Locale.US), this);
667 }
668
669 /**
670 * Create a UnicodeBlock with the given identifier name and
671 * alias name.
672 */
673 private UnicodeBlock(String idName, String alias) {
674 this(idName);
675 map.put(alias.toUpperCase(Locale.US), this);
676 }
677
678 /**
679 * Create a UnicodeBlock with the given identifier name and
680 * alias names.
681 */
682 private UnicodeBlock(String idName, String[] aliasName) {
683 this(idName);
684 if (aliasName != null) {
685 for(int x=0; x<aliasName.length; ++x) {
686 map.put(aliasName[x].toUpperCase(Locale.US), this);
687 }
688 }
689 }
690
691 /**
692 * Constant for the "Basic Latin" Unicode character block.
693 * @since 1.2
694 */
695 public static final UnicodeBlock BASIC_LATIN =
696 new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" });
697
698 /**
699 * Constant for the "Latin-1 Supplement" Unicode character block.
700 * @since 1.2
701 */
702 public static final UnicodeBlock LATIN_1_SUPPLEMENT =
703 new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"});
704
705 /**
706 * Constant for the "Latin Extended-A" Unicode character block.
707 * @since 1.2
708 */
709 public static final UnicodeBlock LATIN_EXTENDED_A =
710 new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"});
711
712 /**
713 * Constant for the "Latin Extended-B" Unicode character block.
714 * @since 1.2
715 */
716 public static final UnicodeBlock LATIN_EXTENDED_B =
717 new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"});
718
719 /**
720 * Constant for the "IPA Extensions" Unicode character block.
721 * @since 1.2
722 */
723 public static final UnicodeBlock IPA_EXTENSIONS =
724 new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"});
725
726 /**
727 * Constant for the "Spacing Modifier Letters" Unicode character block.
728 * @since 1.2
729 */
730 public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
731 new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters",
732 "SpacingModifierLetters"});
733
734 /**
735 * Constant for the "Combining Diacritical Marks" Unicode character block.
736 * @since 1.2
737 */
738 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
739 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks",
740 "CombiningDiacriticalMarks" });
741
742 /**
743 * Constant for the "Greek and Coptic" Unicode character block.
744 * <p>
745 * This block was previously known as the "Greek" block.
746 *
747 * @since 1.2
748 */
749 public static final UnicodeBlock GREEK
750 = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"});
751
752 /**
753 * Constant for the "Cyrillic" Unicode character block.
754 * @since 1.2
755 */
756 public static final UnicodeBlock CYRILLIC =
757 new UnicodeBlock("CYRILLIC");
758
759 /**
760 * Constant for the "Armenian" Unicode character block.
761 * @since 1.2
762 */
763 public static final UnicodeBlock ARMENIAN =
764 new UnicodeBlock("ARMENIAN");
765
766 /**
767 * Constant for the "Hebrew" Unicode character block.
768 * @since 1.2
769 */
770 public static final UnicodeBlock HEBREW =
771 new UnicodeBlock("HEBREW");
772
773 /**
774 * Constant for the "Arabic" Unicode character block.
775 * @since 1.2
776 */
777 public static final UnicodeBlock ARABIC =
778 new UnicodeBlock("ARABIC");
779
780 /**
781 * Constant for the "Devanagari" Unicode character block.
782 * @since 1.2
783 */
784 public static final UnicodeBlock DEVANAGARI =
785 new UnicodeBlock("DEVANAGARI");
786
787 /**
788 * Constant for the "Bengali" Unicode character block.
789 * @since 1.2
790 */
791 public static final UnicodeBlock BENGALI =
792 new UnicodeBlock("BENGALI");
793
794 /**
795 * Constant for the "Gurmukhi" Unicode character block.
796 * @since 1.2
797 */
798 public static final UnicodeBlock GURMUKHI =
799 new UnicodeBlock("GURMUKHI");
800
801 /**
802 * Constant for the "Gujarati" Unicode character block.
803 * @since 1.2
804 */
805 public static final UnicodeBlock GUJARATI =
806 new UnicodeBlock("GUJARATI");
807
808 /**
809 * Constant for the "Oriya" Unicode character block.
810 * @since 1.2
811 */
812 public static final UnicodeBlock ORIYA =
813 new UnicodeBlock("ORIYA");
814
815 /**
816 * Constant for the "Tamil" Unicode character block.
817 * @since 1.2
818 */
819 public static final UnicodeBlock TAMIL =
820 new UnicodeBlock("TAMIL");
821
822 /**
823 * Constant for the "Telugu" Unicode character block.
824 * @since 1.2
825 */
826 public static final UnicodeBlock TELUGU =
827 new UnicodeBlock("TELUGU");
828
829 /**
830 * Constant for the "Kannada" Unicode character block.
831 * @since 1.2
832 */
833 public static final UnicodeBlock KANNADA =
834 new UnicodeBlock("KANNADA");
835
836 /**
837 * Constant for the "Malayalam" Unicode character block.
838 * @since 1.2
839 */
840 public static final UnicodeBlock MALAYALAM =
841 new UnicodeBlock("MALAYALAM");
842
843 /**
844 * Constant for the "Thai" Unicode character block.
845 * @since 1.2
846 */
847 public static final UnicodeBlock THAI =
848 new UnicodeBlock("THAI");
849
850 /**
851 * Constant for the "Lao" Unicode character block.
852 * @since 1.2
853 */
854 public static final UnicodeBlock LAO =
855 new UnicodeBlock("LAO");
856
857 /**
858 * Constant for the "Tibetan" Unicode character block.
859 * @since 1.2
860 */
861 public static final UnicodeBlock TIBETAN =
862 new UnicodeBlock("TIBETAN");
863
864 /**
865 * Constant for the "Georgian" Unicode character block.
866 * @since 1.2
867 */
868 public static final UnicodeBlock GEORGIAN =
869 new UnicodeBlock("GEORGIAN");
870
871 /**
872 * Constant for the "Hangul Jamo" Unicode character block.
873 * @since 1.2
874 */
875 public static final UnicodeBlock HANGUL_JAMO =
876 new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"});
877
878 /**
879 * Constant for the "Latin Extended Additional" Unicode character block.
880 * @since 1.2
881 */
882 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
883 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional",
884 "LatinExtendedAdditional"});
885
886 /**
887 * Constant for the "Greek Extended" Unicode character block.
888 * @since 1.2
889 */
890 public static final UnicodeBlock GREEK_EXTENDED =
891 new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"});
892
893 /**
894 * Constant for the "General Punctuation" Unicode character block.
895 * @since 1.2
896 */
897 public static final UnicodeBlock GENERAL_PUNCTUATION =
898 new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"});
899
900 /**
901 * Constant for the "Superscripts and Subscripts" Unicode character block.
902 * @since 1.2
903 */
904 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
905 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts",
906 "SuperscriptsandSubscripts" });
907
908 /**
909 * Constant for the "Currency Symbols" Unicode character block.
910 * @since 1.2
911 */
912 public static final UnicodeBlock CURRENCY_SYMBOLS =
913 new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"});
914
915 /**
916 * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
917 * <p>
918 * This block was previously known as "Combining Marks for Symbols".
919 * @since 1.2
920 */
921 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
922 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols",
923 "CombiningDiacriticalMarksforSymbols",
924 "Combining Marks for Symbols",
925 "CombiningMarksforSymbols" });
926
927 /**
928 * Constant for the "Letterlike Symbols" Unicode character block.
929 * @since 1.2
930 */
931 public static final UnicodeBlock LETTERLIKE_SYMBOLS =
932 new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"});
933
934 /**
935 * Constant for the "Number Forms" Unicode character block.
936 * @since 1.2
937 */
938 public static final UnicodeBlock NUMBER_FORMS =
939 new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"});
940
941 /**
942 * Constant for the "Arrows" Unicode character block.
943 * @since 1.2
944 */
945 public static final UnicodeBlock ARROWS =
946 new UnicodeBlock("ARROWS");
947
948 /**
949 * Constant for the "Mathematical Operators" Unicode character block.
950 * @since 1.2
951 */
952 public static final UnicodeBlock MATHEMATICAL_OPERATORS =
953 new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators",
954 "MathematicalOperators"});
955
956 /**
957 * Constant for the "Miscellaneous Technical" Unicode character block.
958 * @since 1.2
959 */
960 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
961 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical",
962 "MiscellaneousTechnical"});
963
964 /**
965 * Constant for the "Control Pictures" Unicode character block.
966 * @since 1.2
967 */
968 public static final UnicodeBlock CONTROL_PICTURES =
969 new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"});
970
971 /**
972 * Constant for the "Optical Character Recognition" Unicode character block.
973 * @since 1.2
974 */
975 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
976 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition",
977 "OpticalCharacterRecognition"});
978
979 /**
980 * Constant for the "Enclosed Alphanumerics" Unicode character block.
981 * @since 1.2
982 */
983 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
984 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics",
985 "EnclosedAlphanumerics"});
986
987 /**
988 * Constant for the "Box Drawing" Unicode character block.
989 * @since 1.2
990 */
991 public static final UnicodeBlock BOX_DRAWING =
992 new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"});
993
994 /**
995 * Constant for the "Block Elements" Unicode character block.
996 * @since 1.2
997 */
998 public static final UnicodeBlock BLOCK_ELEMENTS =
999 new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"});
1000
1001 /**
1002 * Constant for the "Geometric Shapes" Unicode character block.
1003 * @since 1.2
1004 */
1005 public static final UnicodeBlock GEOMETRIC_SHAPES =
1006 new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"});
1007
1008 /**
1009 * Constant for the "Miscellaneous Symbols" Unicode character block.
1010 * @since 1.2
1011 */
1012 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1013 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols",
1014 "MiscellaneousSymbols"});
1015
1016 /**
1017 * Constant for the "Dingbats" Unicode character block.
1018 * @since 1.2
1019 */
1020 public static final UnicodeBlock DINGBATS =
1021 new UnicodeBlock("DINGBATS");
1022
1023 /**
1024 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1025 * @since 1.2
1026 */
1027 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1028 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation",
1029 "CJKSymbolsandPunctuation"});
1030
1031 /**
1032 * Constant for the "Hiragana" Unicode character block.
1033 * @since 1.2
1034 */
1035 public static final UnicodeBlock HIRAGANA =
1036 new UnicodeBlock("HIRAGANA");
1037
1038 /**
1039 * Constant for the "Katakana" Unicode character block.
1040 * @since 1.2
1041 */
1042 public static final UnicodeBlock KATAKANA =
1043 new UnicodeBlock("KATAKANA");
1044
1045 /**
1046 * Constant for the "Bopomofo" Unicode character block.
1047 * @since 1.2
1048 */
1049 public static final UnicodeBlock BOPOMOFO =
1050 new UnicodeBlock("BOPOMOFO");
1051
1052 /**
1053 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1054 * @since 1.2
1055 */
1056 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1057 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo",
1058 "HangulCompatibilityJamo"});
1059
1060 /**
1061 * Constant for the "Kanbun" Unicode character block.
1062 * @since 1.2
1063 */
1064 public static final UnicodeBlock KANBUN =
1065 new UnicodeBlock("KANBUN");
1066
1067 /**
1068 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1069 * @since 1.2
1070 */
1071 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1072 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months",
1073 "EnclosedCJKLettersandMonths"});
1074
1075 /**
1076 * Constant for the "CJK Compatibility" Unicode character block.
1077 * @since 1.2
1078 */
1079 public static final UnicodeBlock CJK_COMPATIBILITY =
1080 new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"});
1081
1082 /**
1083 * Constant for the "CJK Unified Ideographs" Unicode character block.
1084 * @since 1.2
1085 */
1086 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1087 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs",
1088 "CJKUnifiedIdeographs"});
1089
1090 /**
1091 * Constant for the "Hangul Syllables" Unicode character block.
1092 * @since 1.2
1093 */
1094 public static final UnicodeBlock HANGUL_SYLLABLES =
1095 new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"});
1096
1097 /**
1098 * Constant for the "Private Use Area" Unicode character block.
1099 * @since 1.2
1100 */
1101 public static final UnicodeBlock PRIVATE_USE_AREA =
1102 new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"});
1103
1104 /**
1105 * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1106 * @since 1.2
1107 */
1108 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1109 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1110 new String[] {"CJK Compatibility Ideographs",
1111 "CJKCompatibilityIdeographs"});
1112
1113 /**
1114 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1115 * @since 1.2
1116 */
1117 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1118 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms",
1119 "AlphabeticPresentationForms"});
1120
1121 /**
1122 * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1123 * @since 1.2
1124 */
1125 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1126 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A",
1127 "ArabicPresentationForms-A"});
1128
1129 /**
1130 * Constant for the "Combining Half Marks" Unicode character block.
1131 * @since 1.2
1132 */
1133 public static final UnicodeBlock COMBINING_HALF_MARKS =
1134 new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks",
1135 "CombiningHalfMarks"});
1136
1137 /**
1138 * Constant for the "CJK Compatibility Forms" Unicode character block.
1139 * @since 1.2
1140 */
1141 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1142 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms",
1143 "CJKCompatibilityForms"});
1144
1145 /**
1146 * Constant for the "Small Form Variants" Unicode character block.
1147 * @since 1.2
1148 */
1149 public static final UnicodeBlock SMALL_FORM_VARIANTS =
1150 new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants",
1151 "SmallFormVariants"});
1152
1153 /**
1154 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1155 * @since 1.2
1156 */
1157 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1158 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B",
1159 "ArabicPresentationForms-B"});
1160
1161 /**
1162 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1163 * @since 1.2
1164 */
1165 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1166 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1167 new String[] {"Halfwidth and Fullwidth Forms",
1168 "HalfwidthandFullwidthForms"});
1169
1170 /**
1171 * Constant for the "Specials" Unicode character block.
1172 * @since 1.2
1173 */
1174 public static final UnicodeBlock SPECIALS =
1175 new UnicodeBlock("SPECIALS");
1176
1177 /**
1178 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1179 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1180 * {@link #LOW_SURROGATES}. These new constants match
1181 * the block definitions of the Unicode Standard.
1182 * The {@link #of(char)} and {@link #of(int)} methods
1183 * return the new constants, not SURROGATES_AREA.
1184 */
1185 @Deprecated
1186 public static final UnicodeBlock SURROGATES_AREA =
1187 new UnicodeBlock("SURROGATES_AREA");
1188
1189 /**
1190 * Constant for the "Syriac" Unicode character block.
1191 * @since 1.4
1192 */
1193 public static final UnicodeBlock SYRIAC =
1194 new UnicodeBlock("SYRIAC");
1195
1196 /**
1197 * Constant for the "Thaana" Unicode character block.
1198 * @since 1.4
1199 */
1200 public static final UnicodeBlock THAANA =
1201 new UnicodeBlock("THAANA");
1202
1203 /**
1204 * Constant for the "Sinhala" Unicode character block.
1205 * @since 1.4
1206 */
1207 public static final UnicodeBlock SINHALA =
1208 new UnicodeBlock("SINHALA");
1209
1210 /**
1211 * Constant for the "Myanmar" Unicode character block.
1212 * @since 1.4
1213 */
1214 public static final UnicodeBlock MYANMAR =
1215 new UnicodeBlock("MYANMAR");
1216
1217 /**
1218 * Constant for the "Ethiopic" Unicode character block.
1219 * @since 1.4
1220 */
1221 public static final UnicodeBlock ETHIOPIC =
1222 new UnicodeBlock("ETHIOPIC");
1223
1224 /**
1225 * Constant for the "Cherokee" Unicode character block.
1226 * @since 1.4
1227 */
1228 public static final UnicodeBlock CHEROKEE =
1229 new UnicodeBlock("CHEROKEE");
1230
1231 /**
1232 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1233 * @since 1.4
1234 */
1235 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1236 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1237 new String[] {"Unified Canadian Aboriginal Syllabics",
1238 "UnifiedCanadianAboriginalSyllabics"});
1239
1240 /**
1241 * Constant for the "Ogham" Unicode character block.
1242 * @since 1.4
1243 */
1244 public static final UnicodeBlock OGHAM =
1245 new UnicodeBlock("OGHAM");
1246
1247 /**
1248 * Constant for the "Runic" Unicode character block.
1249 * @since 1.4
1250 */
1251 public static final UnicodeBlock RUNIC =
1252 new UnicodeBlock("RUNIC");
1253
1254 /**
1255 * Constant for the "Khmer" Unicode character block.
1256 * @since 1.4
1257 */
1258 public static final UnicodeBlock KHMER =
1259 new UnicodeBlock("KHMER");
1260
1261 /**
1262 * Constant for the "Mongolian" Unicode character block.
1263 * @since 1.4
1264 */
1265 public static final UnicodeBlock MONGOLIAN =
1266 new UnicodeBlock("MONGOLIAN");
1267
1268 /**
1269 * Constant for the "Braille Patterns" Unicode character block.
1270 * @since 1.4
1271 */
1272 public static final UnicodeBlock BRAILLE_PATTERNS =
1273 new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns",
1274 "BraillePatterns"});
1275
1276 /**
1277 * Constant for the "CJK Radicals Supplement" Unicode character block.
1278 * @since 1.4
1279 */
1280 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1281 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement",
1282 "CJKRadicalsSupplement"});
1283
1284 /**
1285 * Constant for the "Kangxi Radicals" Unicode character block.
1286 * @since 1.4
1287 */
1288 public static final UnicodeBlock KANGXI_RADICALS =
1289 new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"});
1290
1291 /**
1292 * Constant for the "Ideographic Description Characters" Unicode character block.
1293 * @since 1.4
1294 */
1295 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1296 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters",
1297 "IdeographicDescriptionCharacters"});
1298
1299 /**
1300 * Constant for the "Bopomofo Extended" Unicode character block.
1301 * @since 1.4
1302 */
1303 public static final UnicodeBlock BOPOMOFO_EXTENDED =
1304 new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended",
1305 "BopomofoExtended"});
1306
1307 /**
1308 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1309 * @since 1.4
1310 */
1311 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1312 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A",
1313 "CJKUnifiedIdeographsExtensionA"});
1314
1315 /**
1316 * Constant for the "Yi Syllables" Unicode character block.
1317 * @since 1.4
1318 */
1319 public static final UnicodeBlock YI_SYLLABLES =
1320 new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"});
1321
1322 /**
1323 * Constant for the "Yi Radicals" Unicode character block.
1324 * @since 1.4
1325 */
1326 public static final UnicodeBlock YI_RADICALS =
1327 new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"});
1328
1329
1330 /**
1331 * Constant for the "Cyrillic Supplementary" Unicode character block.
1332 * @since 1.5
1333 */
1334 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1335 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", new String[] {"Cyrillic Supplementary",
1336 "CyrillicSupplementary"});
1337
1338 /**
1339 * Constant for the "Tagalog" Unicode character block.
1340 * @since 1.5
1341 */
1342 public static final UnicodeBlock TAGALOG =
1343 new UnicodeBlock("TAGALOG");
1344
1345 /**
1346 * Constant for the "Hanunoo" Unicode character block.
1347 * @since 1.5
1348 */
1349 public static final UnicodeBlock HANUNOO =
1350 new UnicodeBlock("HANUNOO");
1351
1352 /**
1353 * Constant for the "Buhid" Unicode character block.
1354 * @since 1.5
1355 */
1356 public static final UnicodeBlock BUHID =
1357 new UnicodeBlock("BUHID");
1358
1359 /**
1360 * Constant for the "Tagbanwa" Unicode character block.
1361 * @since 1.5
1362 */
1363 public static final UnicodeBlock TAGBANWA =
1364 new UnicodeBlock("TAGBANWA");
1365
1366 /**
1367 * Constant for the "Limbu" Unicode character block.
1368 * @since 1.5
1369 */
1370 public static final UnicodeBlock LIMBU =
1371 new UnicodeBlock("LIMBU");
1372
1373 /**
1374 * Constant for the "Tai Le" Unicode character block.
1375 * @since 1.5
1376 */
1377 public static final UnicodeBlock TAI_LE =
1378 new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"});
1379
1380 /**
1381 * Constant for the "Khmer Symbols" Unicode character block.
1382 * @since 1.5
1383 */
1384 public static final UnicodeBlock KHMER_SYMBOLS =
1385 new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"});
1386
1387 /**
1388 * Constant for the "Phonetic Extensions" Unicode character block.
1389 * @since 1.5
1390 */
1391 public static final UnicodeBlock PHONETIC_EXTENSIONS =
1392 new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"});
1393
1394 /**
1395 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1396 * @since 1.5
1397 */
1398 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1399 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1400 new String[]{"Miscellaneous Mathematical Symbols-A",
1401 "MiscellaneousMathematicalSymbols-A"});
1402
1403 /**
1404 * Constant for the "Supplemental Arrows-A" Unicode character block.
1405 * @since 1.5
1406 */
1407 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1408 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A",
1409 "SupplementalArrows-A"});
1410
1411 /**
1412 * Constant for the "Supplemental Arrows-B" Unicode character block.
1413 * @since 1.5
1414 */
1415 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1416 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B",
1417 "SupplementalArrows-B"});
1418
1419 /**
1420 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1421 * @since 1.5
1422 */
1423 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1424 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1425 new String[] {"Miscellaneous Mathematical Symbols-B",
1426 "MiscellaneousMathematicalSymbols-B"});
1427
1428 /**
1429 * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1430 * @since 1.5
1431 */
1432 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1433 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1434 new String[]{"Supplemental Mathematical Operators",
1435 "SupplementalMathematicalOperators"} );
1436
1437 /**
1438 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1439 * @since 1.5
1440 */
1441 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1442 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows",
1443 "MiscellaneousSymbolsandArrows"});
1444
1445 /**
1446 * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1447 * @since 1.5
1448 */
1449 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1450 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions",
1451 "KatakanaPhoneticExtensions"});
1452
1453 /**
1454 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1455 * @since 1.5
1456 */
1457 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1458 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols",
1459 "YijingHexagramSymbols"});
1460
1461 /**
1462 * Constant for the "Variation Selectors" Unicode character block.
1463 * @since 1.5
1464 */
1465 public static final UnicodeBlock VARIATION_SELECTORS =
1466 new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"});
1467
1468 /**
1469 * Constant for the "Linear B Syllabary" Unicode character block.
1470 * @since 1.5
1471 */
1472 public static final UnicodeBlock LINEAR_B_SYLLABARY =
1473 new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"});
1474
1475 /**
1476 * Constant for the "Linear B Ideograms" Unicode character block.
1477 * @since 1.5
1478 */
1479 public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1480 new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"});
1481
1482 /**
1483 * Constant for the "Aegean Numbers" Unicode character block.
1484 * @since 1.5
1485 */
1486 public static final UnicodeBlock AEGEAN_NUMBERS =
1487 new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"});
1488
1489 /**
1490 * Constant for the "Old Italic" Unicode character block.
1491 * @since 1.5
1492 */
1493 public static final UnicodeBlock OLD_ITALIC =
1494 new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"});
1495
1496 /**
1497 * Constant for the "Gothic" Unicode character block.
1498 * @since 1.5
1499 */
1500 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
1501
1502 /**
1503 * Constant for the "Ugaritic" Unicode character block.
1504 * @since 1.5
1505 */
1506 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
1507
1508 /**
1509 * Constant for the "Deseret" Unicode character block.
1510 * @since 1.5
1511 */
1512 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
1513
1514 /**
1515 * Constant for the "Shavian" Unicode character block.
1516 * @since 1.5
1517 */
1518 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
1519
1520 /**
1521 * Constant for the "Osmanya" Unicode character block.
1522 * @since 1.5
1523 */
1524 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
1525
1526 /**
1527 * Constant for the "Cypriot Syllabary" Unicode character block.
1528 * @since 1.5
1529 */
1530 public static final UnicodeBlock CYPRIOT_SYLLABARY =
1531 new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"});
1532
1533 /**
1534 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1535 * @since 1.5
1536 */
1537 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1538 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols",
1539 "ByzantineMusicalSymbols"});
1540
1541 /**
1542 * Constant for the "Musical Symbols" Unicode character block.
1543 * @since 1.5
1544 */
1545 public static final UnicodeBlock MUSICAL_SYMBOLS =
1546 new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"});
1547
1548 /**
1549 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1550 * @since 1.5
1551 */
1552 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1553 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols",
1554 "TaiXuanJingSymbols"});
1555
1556 /**
1557 * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1558 * @since 1.5
1559 */
1560 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1561 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1562 new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
1563
1564 /**
1565 * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1566 * @since 1.5
1567 */
1568 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1569 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1570 new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
1571
1572 /**
1573 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1574 * @since 1.5
1575 */
1576 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1577 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1578 new String[]{"CJK Compatibility Ideographs Supplement",
1579 "CJKCompatibilityIdeographsSupplement"});
1580
1581 /**
1582 * Constant for the "Tags" Unicode character block.
1583 * @since 1.5
1584 */
1585 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1586
1587 /**
1588 * Constant for the "Variation Selectors Supplement" Unicode character block.
1589 * @since 1.5
1590 */
1591 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1592 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement",
1593 "VariationSelectorsSupplement"});
1594
1595 /**
1596 * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1597 * @since 1.5
1598 */
1599 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1600 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1601 new String[] {"Supplementary Private Use Area-A",
1602 "SupplementaryPrivateUseArea-A"});
1603
1604 /**
1605 * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1606 * @since 1.5
1607 */
1608 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1609 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1610 new String[] {"Supplementary Private Use Area-B",
1611 "SupplementaryPrivateUseArea-B"});
1612
1613 /**
1614 * Constant for the "High Surrogates" Unicode character block.
1615 * This block represents codepoint values in the high surrogate
1616 * range: 0xD800 through 0xDB7F
1617 *
1618 * @since 1.5
1619 */
1620 public static final UnicodeBlock HIGH_SURROGATES =
1621 new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"});
1622
1623 /**
1624 * Constant for the "High Private Use Surrogates" Unicode character block.
1625 * This block represents codepoint values in the high surrogate
1626 * range: 0xDB80 through 0xDBFF
1627 *
1628 * @since 1.5
1629 */
1630 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1631 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates",
1632 "HighPrivateUseSurrogates"});
1633
1634 /**
1635 * Constant for the "Low Surrogates" Unicode character block.
1636 * This block represents codepoint values in the high surrogate
1637 * range: 0xDC00 through 0xDFFF
1638 *
1639 * @since 1.5
1640 */
1641 public static final UnicodeBlock LOW_SURROGATES =
1642 new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"});
1643
1644 private static final int blockStarts[] = {
1645 0x0000, // Basic Latin
1646 0x0080, // Latin-1 Supplement
1647 0x0100, // Latin Extended-A
1648 0x0180, // Latin Extended-B
1649 0x0250, // IPA Extensions
1650 0x02B0, // Spacing Modifier Letters
1651 0x0300, // Combining Diacritical Marks
1652 0x0370, // Greek and Coptic
1653 0x0400, // Cyrillic
1654 0x0500, // Cyrillic Supplementary
1655 0x0530, // Armenian
1656 0x0590, // Hebrew
1657 0x0600, // Arabic
1658 0x0700, // Syriac
1659 0x0750, // unassigned
1660 0x0780, // Thaana
1661 0x07C0, // unassigned
1662 0x0900, // Devanagari
1663 0x0980, // Bengali
1664 0x0A00, // Gurmukhi
1665 0x0A80, // Gujarati
1666 0x0B00, // Oriya
1667 0x0B80, // Tamil
1668 0x0C00, // Telugu
1669 0x0C80, // Kannada
1670 0x0D00, // Malayalam
1671 0x0D80, // Sinhala
1672 0x0E00, // Thai
1673 0x0E80, // Lao
1674 0x0F00, // Tibetan
1675 0x1000, // Myanmar
1676 0x10A0, // Georgian
1677 0x1100, // Hangul Jamo
1678 0x1200, // Ethiopic
1679 0x1380, // unassigned
1680 0x13A0, // Cherokee
1681 0x1400, // Unified Canadian Aboriginal Syllabics
1682 0x1680, // Ogham
1683 0x16A0, // Runic
1684 0x1700, // Tagalog
1685 0x1720, // Hanunoo
1686 0x1740, // Buhid
1687 0x1760, // Tagbanwa
1688 0x1780, // Khmer
1689 0x1800, // Mongolian
1690 0x18B0, // unassigned
1691 0x1900, // Limbu
1692 0x1950, // Tai Le
1693 0x1980, // unassigned
1694 0x19E0, // Khmer Symbols
1695 0x1A00, // unassigned
1696 0x1D00, // Phonetic Extensions
1697 0x1D80, // unassigned
1698 0x1E00, // Latin Extended Additional
1699 0x1F00, // Greek Extended
1700 0x2000, // General Punctuation
1701 0x2070, // Superscripts and Subscripts
1702 0x20A0, // Currency Symbols
1703 0x20D0, // Combining Diacritical Marks for Symbols
1704 0x2100, // Letterlike Symbols
1705 0x2150, // Number Forms
1706 0x2190, // Arrows
1707 0x2200, // Mathematical Operators
1708 0x2300, // Miscellaneous Technical
1709 0x2400, // Control Pictures
1710 0x2440, // Optical Character Recognition
1711 0x2460, // Enclosed Alphanumerics
1712 0x2500, // Box Drawing
1713 0x2580, // Block Elements
1714 0x25A0, // Geometric Shapes
1715 0x2600, // Miscellaneous Symbols
1716 0x2700, // Dingbats
1717 0x27C0, // Miscellaneous Mathematical Symbols-A
1718 0x27F0, // Supplemental Arrows-A
1719 0x2800, // Braille Patterns
1720 0x2900, // Supplemental Arrows-B
1721 0x2980, // Miscellaneous Mathematical Symbols-B
1722 0x2A00, // Supplemental Mathematical Operators
1723 0x2B00, // Miscellaneous Symbols and Arrows
1724 0x2C00, // unassigned
1725 0x2E80, // CJK Radicals Supplement
1726 0x2F00, // Kangxi Radicals
1727 0x2FE0, // unassigned
1728 0x2FF0, // Ideographic Description Characters
1729 0x3000, // CJK Symbols and Punctuation
1730 0x3040, // Hiragana
1731 0x30A0, // Katakana
1732 0x3100, // Bopomofo
1733 0x3130, // Hangul Compatibility Jamo
1734 0x3190, // Kanbun
1735 0x31A0, // Bopomofo Extended
1736 0x31C0, // unassigned
1737 0x31F0, // Katakana Phonetic Extensions
1738 0x3200, // Enclosed CJK Letters and Months
1739 0x3300, // CJK Compatibility
1740 0x3400, // CJK Unified Ideographs Extension A
1741 0x4DC0, // Yijing Hexagram Symbols
1742 0x4E00, // CJK Unified Ideographs
1743 0xA000, // Yi Syllables
1744 0xA490, // Yi Radicals
1745 0xA4D0, // unassigned
1746 0xAC00, // Hangul Syllables
1747 0xD7B0, // unassigned
1748 0xD800, // High Surrogates
1749 0xDB80, // High Private Use Surrogates
1750 0xDC00, // Low Surrogates
1751 0xE000, // Private Use
1752 0xF900, // CJK Compatibility Ideographs
1753 0xFB00, // Alphabetic Presentation Forms
1754 0xFB50, // Arabic Presentation Forms-A
1755 0xFE00, // Variation Selectors
1756 0xFE10, // unassigned
1757 0xFE20, // Combining Half Marks
1758 0xFE30, // CJK Compatibility Forms
1759 0xFE50, // Small Form Variants
1760 0xFE70, // Arabic Presentation Forms-B
1761 0xFF00, // Halfwidth and Fullwidth Forms
1762 0xFFF0, // Specials
1763 0x10000, // Linear B Syllabary
1764 0x10080, // Linear B Ideograms
1765 0x10100, // Aegean Numbers
1766 0x10140, // unassigned
1767 0x10300, // Old Italic
1768 0x10330, // Gothic
1769 0x10350, // unassigned
1770 0x10380, // Ugaritic
1771 0x103A0, // unassigned
1772 0x10400, // Deseret
1773 0x10450, // Shavian
1774 0x10480, // Osmanya
1775 0x104B0, // unassigned
1776 0x10800, // Cypriot Syllabary
1777 0x10840, // unassigned
1778 0x1D000, // Byzantine Musical Symbols
1779 0x1D100, // Musical Symbols
1780 0x1D200, // unassigned
1781 0x1D300, // Tai Xuan Jing Symbols
1782 0x1D360, // unassigned
1783 0x1D400, // Mathematical Alphanumeric Symbols
1784 0x1D800, // unassigned
1785 0x20000, // CJK Unified Ideographs Extension B
1786 0x2A6E0, // unassigned
1787 0x2F800, // CJK Compatibility Ideographs Supplement
1788 0x2FA20, // unassigned
1789 0xE0000, // Tags
1790 0xE0080, // unassigned
1791 0xE0100, // Variation Selectors Supplement
1792 0xE01F0, // unassigned
1793 0xF0000, // Supplementary Private Use Area-A
1794 0x100000, // Supplementary Private Use Area-B
1795 };
1796
1797 private static final UnicodeBlock[] blocks = {
1798 BASIC_LATIN,
1799 LATIN_1_SUPPLEMENT,
1800 LATIN_EXTENDED_A,
1801 LATIN_EXTENDED_B,
1802 IPA_EXTENSIONS,
1803 SPACING_MODIFIER_LETTERS,
1804 COMBINING_DIACRITICAL_MARKS,
1805 GREEK,
1806 CYRILLIC,
1807 CYRILLIC_SUPPLEMENTARY,
1808 ARMENIAN,
1809 HEBREW,
1810 ARABIC,
1811 SYRIAC,
1812 null,
1813 THAANA,
1814 null,
1815 DEVANAGARI,
1816 BENGALI,
1817 GURMUKHI,
1818 GUJARATI,
1819 ORIYA,
1820 TAMIL,
1821 TELUGU,
1822 KANNADA,
1823 MALAYALAM,
1824 SINHALA,
1825 THAI,
1826 LAO,
1827 TIBETAN,
1828 MYANMAR,
1829 GEORGIAN,
1830 HANGUL_JAMO,
1831 ETHIOPIC,
1832 null,
1833 CHEROKEE,
1834 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1835 OGHAM,
1836 RUNIC,
1837 TAGALOG,
1838 HANUNOO,
1839 BUHID,
1840 TAGBANWA,
1841 KHMER,
1842 MONGOLIAN,
1843 null,
1844 LIMBU,
1845 TAI_LE,
1846 null,
1847 KHMER_SYMBOLS,
1848 null,
1849 PHONETIC_EXTENSIONS,
1850 null,
1851 LATIN_EXTENDED_ADDITIONAL,
1852 GREEK_EXTENDED,
1853 GENERAL_PUNCTUATION,
1854 SUPERSCRIPTS_AND_SUBSCRIPTS,
1855 CURRENCY_SYMBOLS,
1856 COMBINING_MARKS_FOR_SYMBOLS,
1857 LETTERLIKE_SYMBOLS,
1858 NUMBER_FORMS,
1859 ARROWS,
1860 MATHEMATICAL_OPERATORS,
1861 MISCELLANEOUS_TECHNICAL,
1862 CONTROL_PICTURES,
1863 OPTICAL_CHARACTER_RECOGNITION,
1864 ENCLOSED_ALPHANUMERICS,
1865 BOX_DRAWING,
1866 BLOCK_ELEMENTS,
1867 GEOMETRIC_SHAPES,
1868 MISCELLANEOUS_SYMBOLS,
1869 DINGBATS,
1870 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1871 SUPPLEMENTAL_ARROWS_A,
1872 BRAILLE_PATTERNS,
1873 SUPPLEMENTAL_ARROWS_B,
1874 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1875 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1876 MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1877 null,
1878 CJK_RADICALS_SUPPLEMENT,
1879 KANGXI_RADICALS,
1880 null,
1881 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1882 CJK_SYMBOLS_AND_PUNCTUATION,
1883 HIRAGANA,
1884 KATAKANA,
1885 BOPOMOFO,
1886 HANGUL_COMPATIBILITY_JAMO,
1887 KANBUN,
1888 BOPOMOFO_EXTENDED,
1889 null,
1890 KATAKANA_PHONETIC_EXTENSIONS,
1891 ENCLOSED_CJK_LETTERS_AND_MONTHS,
1892 CJK_COMPATIBILITY,
1893 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1894 YIJING_HEXAGRAM_SYMBOLS,
1895 CJK_UNIFIED_IDEOGRAPHS,
1896 YI_SYLLABLES,
1897 YI_RADICALS,
1898 null,
1899 HANGUL_SYLLABLES,
1900 null,
1901 HIGH_SURROGATES,
1902 HIGH_PRIVATE_USE_SURROGATES,
1903 LOW_SURROGATES,
1904 PRIVATE_USE_AREA,
1905 CJK_COMPATIBILITY_IDEOGRAPHS,
1906 ALPHABETIC_PRESENTATION_FORMS,
1907 ARABIC_PRESENTATION_FORMS_A,
1908 VARIATION_SELECTORS,
1909 null,
1910 COMBINING_HALF_MARKS,
1911 CJK_COMPATIBILITY_FORMS,
1912 SMALL_FORM_VARIANTS,
1913 ARABIC_PRESENTATION_FORMS_B,
1914 HALFWIDTH_AND_FULLWIDTH_FORMS,
1915 SPECIALS,
1916 LINEAR_B_SYLLABARY,
1917 LINEAR_B_IDEOGRAMS,
1918 AEGEAN_NUMBERS,
1919 null,
1920 OLD_ITALIC,
1921 GOTHIC,
1922 null,
1923 UGARITIC,
1924 null,
1925 DESERET,
1926 SHAVIAN,
1927 OSMANYA,
1928 null,
1929 CYPRIOT_SYLLABARY,
1930 null,
1931 BYZANTINE_MUSICAL_SYMBOLS,
1932 MUSICAL_SYMBOLS,
1933 null,
1934 TAI_XUAN_JING_SYMBOLS,
1935 null,
1936 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1937 null,
1938 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1939 null,
1940 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1941 null,
1942 TAGS,
1943 null,
1944 VARIATION_SELECTORS_SUPPLEMENT,
1945 null,
1946 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1947 SUPPLEMENTARY_PRIVATE_USE_AREA_B
1948 };
1949
1950
1951 /**
1952 * Returns the object representing the Unicode block containing the
1953 * given character, or <code>null</code> if the character is not a
1954 * member of a defined block.
1955 *
1956 * <p><b>Note:</b> This method cannot handle <a
1957 * href="Character.html#supplementary"> supplementary
1958 * characters</a>. To support all Unicode characters,
1959 * including supplementary characters, use the {@link
1960 * #of(int)} method.
1961 *
1962 * @param c The character in question
1963 * @return The <code>UnicodeBlock</code> instance representing the
1964 * Unicode block of which this character is a member, or
1965 * <code>null</code> if the character is not a member of any
1966 * Unicode block
1967 */
1968 public static UnicodeBlock of(char c) {
1969 return of((int)c);
1970 }
1971
1972
1973 /**
1974 * Returns the object representing the Unicode block
1975 * containing the given character (Unicode code point), or
1976 * <code>null</code> if the character is not a member of a
1977 * defined block.
1978 *
1979 * @param codePoint the character (Unicode code point) in question.
1980 * @return The <code>UnicodeBlock</code> instance representing the
1981 * Unicode block of which this character is a member, or
1982 * <code>null</code> if the character is not a member of any
1983 * Unicode block
1984 * @exception IllegalArgumentException if the specified
1985 * <code>codePoint</code> is an invalid Unicode code point.
1986 * @see Character#isValidCodePoint(int)
1987 * @since 1.5
1988 */
1989 public static UnicodeBlock of(int codePoint) {
1990 if (!isValidCodePoint(codePoint)) {
1991 throw new IllegalArgumentException();
1992 }
1993
1994 int top, bottom, current;
1995 bottom = 0;
1996 top = blockStarts.length;
1997 current = top/2;
1998
1999 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
2000 while (top - bottom > 1) {
2001 if (codePoint >= blockStarts[current]) {
2002 bottom = current;
2003 } else {
2004 top = current;
2005 }
2006 current = (top + bottom) / 2;
2007 }
2008 return blocks[current];
2009 }
2010
2011 /**
2012 * Returns the UnicodeBlock with the given name. Block
2013 * names are determined by The Unicode Standard. The file
2014 * Blocks-<version>.txt defines blocks for a particular
2015 * version of the standard. The {@link Character} class specifies
2016 * the version of the standard that it supports.
2017 * <p>
2018 * This method accepts block names in the following forms:
2019 * <ol>
2020 * <li> Canonical block names as defined by the Unicode Standard.
2021 * For example, the standard defines a "Basic Latin" block. Therefore, this
2022 * method accepts "Basic Latin" as a valid block name. The documentation of
2023 * each UnicodeBlock provides the canonical name.
2024 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
2025 * is a valid block name for the "Basic Latin" block.
2026 * <li>The text representation of each constant UnicodeBlock identifier.
2027 * For example, this method will return the {@link #BASIC_LATIN} block if
2028 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
2029 * hyphens in the canonical name with underscores.
2030 * </ol>
2031 * Finally, character case is ignored for all of the valid block name forms.
2032 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
2033 * The en_US locale's case mapping rules are used to provide case-insensitive
2034 * string comparisons for block name validation.
2035 * <p>
2036 * If the Unicode Standard changes block names, both the previous and
2037 * current names will be accepted.
2038 *
2039 * @param blockName A <code>UnicodeBlock</code> name.
2040 * @return The <code>UnicodeBlock</code> instance identified
2041 * by <code>blockName</code>
2042 * @throws IllegalArgumentException if <code>blockName</code> is an
2043 * invalid name
2044 * @throws NullPointerException if <code>blockName</code> is null
2045 * @since 1.5
2046 */
2047 public static final UnicodeBlock forName(String blockName) {
2048 UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
2049 if (block == null) {
2050 throw new IllegalArgumentException();
2051 }
2052 return block;
2053 }
2054 }
2055
2056
2057 /**
2058 * The value of the <code>Character</code>.
2059 *
2060 * @serial
2061 */
2062 private final char value;
2063
2064 /** use serialVersionUID from JDK 1.0.2 for interoperability */
2065 private static final long serialVersionUID = 3786198910865385080L;
2066
2067 /**
2068 * Constructs a newly allocated <code>Character</code> object that
2069 * represents the specified <code>char</code> value.
2070 *
2071 * @param value the value to be represented by the
2072 * <code>Character</code> object.
2073 */
2074 public Character(char value) {
2075 this.value = value;
2076 }
2077
2078 private static class CharacterCache {
2079 private CharacterCache(){}
2080
2081 static final Character cache[] = new Character[127 + 1];
2082
2083 static {
2084 for(int i = 0; i < cache.length; i++)
2085 cache[i] = new Character((char)i);
2086 }
2087 }
2088
2089 /**
2090 * Returns a <tt>Character</tt> instance representing the specified
2091 * <tt>char</tt> value.
2092 * If a new <tt>Character</tt> instance is not required, this method
2093 * should generally be used in preference to the constructor
2094 * {@link #Character(char)}, as this method is likely to yield
2095 * significantly better space and time performance by caching
2096 * frequently requested values.
2097 *
2098 * @param c a char value.
2099 * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2100 * @since 1.5
2101 */
2102 public static Character valueOf(char c) {
2103 if(c <= 127) { // must cache
2104 return CharacterCache.cache[(int)c];
2105 }
2106 return new Character(c);
2107 }
2108
2109 /**
2110 * Returns the value of this <code>Character</code> object.
2111 * @return the primitive <code>char</code> value represented by
2112 * this object.
2113 */
2114 public char charValue() {
2115 return value;
2116 }
2117
2118 /**
2119 * Returns a hash code for this <code>Character</code>.
2120 * @return a hash code value for this object.
2121 */
2122 public int hashCode() {
2123 return (int)value;
2124 }
2125
2126 /**
2127 * Compares this object against the specified object.
2128 * The result is <code>true</code> if and only if the argument is not
2129 * <code>null</code> and is a <code>Character</code> object that
2130 * represents the same <code>char</code> value as this object.
2131 *
2132 * @param obj the object to compare with.
2133 * @return <code>true</code> if the objects are the same;
2134 * <code>false</code> otherwise.
2135 */
2136 public boolean equals(Object obj) {
2137 if (obj instanceof Character) {
2138 return value == ((Character)obj).charValue();
2139 }
2140 return false;
2141 }
2142
2143 /**
2144 * Returns a <code>String</code> object representing this
2145 * <code>Character</code>'s value. The result is a string of
2146 * length 1 whose sole component is the primitive
2147 * <code>char</code> value represented by this
2148 * <code>Character</code> object.
2149 *
2150 * @return a string representation of this object.
2151 */
2152 public String toString() {
2153 char buf[] = {value};
2154 return String.valueOf(buf);
2155 }
2156
2157 /**
2158 * Returns a <code>String</code> object representing the
2159 * specified <code>char</code>. The result is a string of length
2160 * 1 consisting solely of the specified <code>char</code>.
2161 *
2162 * @param c the <code>char</code> to be converted
2163 * @return the string representation of the specified <code>char</code>
2164 * @since 1.4
2165 */
2166 public static String toString(char c) {
2167 return String.valueOf(c);
2168 }
2169
2170 /**
2171 * Determines whether the specified code point is a valid Unicode
2172 * code point value in the range of <code>0x0000</code> to
2173 * <code>0x10FFFF</code> inclusive. This method is equivalent to
2174 * the expression:
2175 *
2176 * <blockquote><pre>
2177 * codePoint >= 0x0000 && codePoint <= 0x10FFFF
2178 * </pre></blockquote>
2179 *
2180 * @param codePoint the Unicode code point to be tested
2181 * @return <code>true</code> if the specified code point value
2182 * is a valid code point value;
2183 * <code>false</code> otherwise.
2184 * @since 1.5
2185 */
2186 public static boolean isValidCodePoint(int codePoint) {
2187 return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2188 }
2189
2190 /**
2191 * Determines whether the specified character (Unicode code point)
2192 * is in the supplementary character range. The method call is
2193 * equivalent to the expression:
2194 * <blockquote><pre>
2195 * codePoint >= 0x10000 && codePoint <= 0x10FFFF
2196 * </pre></blockquote>
2197 *
2198 * @param codePoint the character (Unicode code point) to be tested
2199 * @return <code>true</code> if the specified character is in the Unicode
2200 * supplementary character range; <code>false</code> otherwise.
2201 * @since 1.5
2202 */
2203 public static boolean isSupplementaryCodePoint(int codePoint) {
2204 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2205 && codePoint <= MAX_CODE_POINT;
2206 }
2207
2208 /**
2209 * Determines if the given <code>char</code> value is a
2210 * high-surrogate code unit (also known as <i>leading-surrogate
2211 * code unit</i>). Such values do not represent characters by
2212 * themselves, but are used in the representation of <a
2213 * href="#supplementary">supplementary characters</a> in the
2214 * UTF-16 encoding.
2215 *
2216 * <p>This method returns <code>true</code> if and only if
2217 * <blockquote><pre>ch >= '\uD800' && ch <= '\uDBFF'
2218 * </pre></blockquote>
2219 * is <code>true</code>.
2220 *
2221 * @param ch the <code>char</code> value to be tested.
2222 * @return <code>true</code> if the <code>char</code> value
2223 * is between '\uD800' and '\uDBFF' inclusive;
2224 * <code>false</code> otherwise.
2225 * @see java.lang.Character#isLowSurrogate(char)
2226 * @see Character.UnicodeBlock#of(int)
2227 * @since 1.5
2228 */
2229 public static boolean isHighSurrogate(char ch) {
2230 return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2231 }
2232
2233 /**
2234 * Determines if the given <code>char</code> value is a
2235 * low-surrogate code unit (also known as <i>trailing-surrogate code
2236 * unit</i>). Such values do not represent characters by themselves,
2237 * but are used in the representation of <a
2238 * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
2239 *
2240 * <p> This method returns <code>true</code> if and only if
2241 * <blockquote><pre>ch >= '\uDC00' && ch <= '\uDFFF'
2242 * </pre></blockquote> is <code>true</code>.
2243 *
2244 * @param ch the <code>char</code> value to be tested.
2245 * @return <code>true</code> if the <code>char</code> value
2246 * is between '\uDC00' and '\uDFFF' inclusive;
2247 * <code>false</code> otherwise.
2248 * @see java.lang.Character#isHighSurrogate(char)
2249 * @since 1.5
2250 */
2251 public static boolean isLowSurrogate(char ch) {
2252 return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2253 }
2254
2255 /**
2256 * Determines whether the specified pair of <code>char</code>
2257 * values is a valid surrogate pair. This method is equivalent to
2258 * the expression:
2259 * <blockquote><pre>
2260 * isHighSurrogate(high) && isLowSurrogate(low)
2261 * </pre></blockquote>
2262 *
2263 * @param high the high-surrogate code value to be tested
2264 * @param low the low-surrogate code value to be tested
2265 * @return <code>true</code> if the specified high and
2266 * low-surrogate code values represent a valid surrogate pair;
2267 * <code>false</code> otherwise.
2268 * @since 1.5
2269 */
2270 public static boolean isSurrogatePair(char high, char low) {
2271 return isHighSurrogate(high) && isLowSurrogate(low);
2272 }
2273
2274 /**
2275 * Determines the number of <code>char</code> values needed to
2276 * represent the specified character (Unicode code point). If the
2277 * specified character is equal to or greater than 0x10000, then
2278 * the method returns 2. Otherwise, the method returns 1.
2279 *
2280 * <p>This method doesn't validate the specified character to be a
2281 * valid Unicode code point. The caller must validate the
2282 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2283 * if necessary.
2284 *
2285 * @param codePoint the character (Unicode code point) to be tested.
2286 * @return 2 if the character is a valid supplementary character; 1 otherwise.
2287 * @see #isSupplementaryCodePoint(int)
2288 * @since 1.5
2289 */
2290 public static int charCount(int codePoint) {
2291 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
2292 }
2293
2294 /**
2295 * Converts the specified surrogate pair to its supplementary code
2296 * point value. This method does not validate the specified
2297 * surrogate pair. The caller must validate it using {@link
2298 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2299 *
2300 * @param high the high-surrogate code unit
2301 * @param low the low-surrogate code unit
2302 * @return the supplementary code point composed from the
2303 * specified surrogate pair.
2304 * @since 1.5
2305 */
2306 public static int toCodePoint(char high, char low) {
2307 return ((high - MIN_HIGH_SURROGATE) << 10)
2308 + (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
2309 }
2310
2311 /**
2312 * Returns the code point at the given index of the
2313 * <code>CharSequence</code>. If the <code>char</code> value at
2314 * the given index in the <code>CharSequence</code> is in the
2315 * high-surrogate range, the following index is less than the
2316 * length of the <code>CharSequence</code>, and the
2317 * <code>char</code> value at the following index is in the
2318 * low-surrogate range, then the supplementary code point
2319 * corresponding to this surrogate pair is returned. Otherwise,
2320 * the <code>char</code> value at the given index is returned.
2321 *
2322 * @param seq a sequence of <code>char</code> values (Unicode code
2323 * units)
2324 * @param index the index to the <code>char</code> values (Unicode
2325 * code units) in <code>seq</code> to be converted
2326 * @return the Unicode code point at the given index
2327 * @exception NullPointerException if <code>seq</code> is null.
2328 * @exception IndexOutOfBoundsException if the value
2329 * <code>index</code> is negative or not less than
2330 * {@link CharSequence#length() seq.length()}.
2331 * @since 1.5
2332 */
2333 public static int codePointAt(CharSequence seq, int index) {
2334 char c1 = seq.charAt(index++);
2335 if (isHighSurrogate(c1)) {
2336 if (index < seq.length()) {
2337 char c2 = seq.charAt(index);
2338 if (isLowSurrogate(c2)) {
2339 return toCodePoint(c1, c2);
2340 }
2341 }
2342 }
2343 return c1;
2344 }
2345
2346 /**
2347 * Returns the code point at the given index of the
2348 * <code>char</code> array. If the <code>char</code> value at
2349 * the given index in the <code>char</code> array is in the
2350 * high-surrogate range, the following index is less than the
2351 * length of the <code>char</code> array, and the
2352 * <code>char</code> value at the following index is in the
2353 * low-surrogate range, then the supplementary code point
2354 * corresponding to this surrogate pair is returned. Otherwise,
2355 * the <code>char</code> value at the given index is returned.
2356 *
2357 * @param a the <code>char</code> array
2358 * @param index the index to the <code>char</code> values (Unicode
2359 * code units) in the <code>char</code> array to be converted
2360 * @return the Unicode code point at the given index
2361 * @exception NullPointerException if <code>a</code> is null.
2362 * @exception IndexOutOfBoundsException if the value
2363 * <code>index</code> is negative or not less than
2364 * the length of the <code>char</code> array.
2365 * @since 1.5
2366 */
2367 public static int codePointAt(char[] a, int index) {
2368 return codePointAtImpl(a, index, a.length);
2369 }
2370
2371 /**
2372 * Returns the code point at the given index of the
2373 * <code>char</code> array, where only array elements with
2374 * <code>index</code> less than <code>limit</code> can be used. If
2375 * the <code>char</code> value at the given index in the
2376 * <code>char</code> array is in the high-surrogate range, the
2377 * following index is less than the <code>limit</code>, and the
2378 * <code>char</code> value at the following index is in the
2379 * low-surrogate range, then the supplementary code point
2380 * corresponding to this surrogate pair is returned. Otherwise,
2381 * the <code>char</code> value at the given index is returned.
2382 *
2383 * @param a the <code>char</code> array
2384 * @param index the index to the <code>char</code> values (Unicode
2385 * code units) in the <code>char</code> array to be converted
2386 * @param limit the index after the last array element that can be used in the
2387 * <code>char</code> array
2388 * @return the Unicode code point at the given index
2389 * @exception NullPointerException if <code>a</code> is null.
2390 * @exception IndexOutOfBoundsException if the <code>index</code>
2391 * argument is negative or not less than the <code>limit</code>
2392 * argument, or if the <code>limit</code> argument is negative or
2393 * greater than the length of the <code>char</code> array.
2394 * @since 1.5
2395 */
2396 public static int codePointAt(char[] a, int index, int limit) {
2397 if (index >= limit || limit < 0 || limit > a.length) {
2398 throw new IndexOutOfBoundsException();
2399 }
2400 return codePointAtImpl(a, index, limit);
2401 }
2402
2403 static int codePointAtImpl(char[] a, int index, int limit) {
2404 char c1 = a[index++];
2405 if (isHighSurrogate(c1)) {
2406 if (index < limit) {
2407 char c2 = a[index];
2408 if (isLowSurrogate(c2)) {
2409 return toCodePoint(c1, c2);
2410 }
2411 }
2412 }
2413 return c1;
2414 }
2415
2416 /**
2417 * Returns the code point preceding the given index of the
2418 * <code>CharSequence</code>. If the <code>char</code> value at
2419 * <code>(index - 1)</code> in the <code>CharSequence</code> is in
2420 * the low-surrogate range, <code>(index - 2)</code> is not
2421 * negative, and the <code>char</code> value at <code>(index -
2422 * 2)</code> in the <code>CharSequence</code> is in the
2423 * high-surrogate range, then the supplementary code point
2424 * corresponding to this surrogate pair is returned. Otherwise,
2425 * the <code>char</code> value at <code>(index - 1)</code> is
2426 * returned.
2427 *
2428 * @param seq the <code>CharSequence</code> instance
2429 * @param index the index following the code point that should be returned
2430 * @return the Unicode code point value before the given index.
2431 * @exception NullPointerException if <code>seq</code> is null.
2432 * @exception IndexOutOfBoundsException if the <code>index</code>
2433 * argument is less than 1 or greater than {@link
2434 * CharSequence#length() seq.length()}.
2435 * @since 1.5
2436 */
2437 public static int codePointBefore(CharSequence seq, int index) {
2438 char c2 = seq.charAt(--index);
2439 if (isLowSurrogate(c2)) {
2440 if (index > 0) {
2441 char c1 = seq.charAt(--index);
2442 if (isHighSurrogate(c1)) {
2443 return toCodePoint(c1, c2);
2444 }
2445 }
2446 }
2447 return c2;
2448 }
2449
2450 /**
2451 * Returns the code point preceding the given index of the
2452 * <code>char</code> array. If the <code>char</code> value at
2453 * <code>(index - 1)</code> in the <code>char</code> array is in
2454 * the low-surrogate range, <code>(index - 2)</code> is not
2455 * negative, and the <code>char</code> value at <code>(index -
2456 * 2)</code> in the <code>char</code> array is in the
2457 * high-surrogate range, then the supplementary code point
2458 * corresponding to this surrogate pair is returned. Otherwise,
2459 * the <code>char</code> value at <code>(index - 1)</code> is
2460 * returned.
2461 *
2462 * @param a the <code>char</code> array
2463 * @param index the index following the code point that should be returned
2464 * @return the Unicode code point value before the given index.
2465 * @exception NullPointerException if <code>a</code> is null.
2466 * @exception IndexOutOfBoundsException if the <code>index</code>
2467 * argument is less than 1 or greater than the length of the
2468 * <code>char</code> array
2469 * @since 1.5
2470 */
2471 public static int codePointBefore(char[] a, int index) {
2472 return codePointBeforeImpl(a, index, 0);
2473 }
2474
2475 /**
2476 * Returns the code point preceding the given index of the
2477 * <code>char</code> array, where only array elements with
2478 * <code>index</code> greater than or equal to <code>start</code>
2479 * can be used. If the <code>char</code> value at <code>(index -
2480 * 1)</code> in the <code>char</code> array is in the
2481 * low-surrogate range, <code>(index - 2)</code> is not less than
2482 * <code>start</code>, and the <code>char</code> value at
2483 * <code>(index - 2)</code> in the <code>char</code> array is in
2484 * the high-surrogate range, then the supplementary code point
2485 * corresponding to this surrogate pair is returned. Otherwise,
2486 * the <code>char</code> value at <code>(index - 1)</code> is
2487 * returned.
2488 *
2489 * @param a the <code>char</code> array
2490 * @param index the index following the code point that should be returned
2491 * @param start the index of the first array element in the
2492 * <code>char</code> array
2493 * @return the Unicode code point value before the given index.
2494 * @exception NullPointerException if <code>a</code> is null.
2495 * @exception IndexOutOfBoundsException if the <code>index</code>
2496 * argument is not greater than the <code>start</code> argument or
2497 * is greater than the length of the <code>char</code> array, or
2498 * if the <code>start</code> argument is negative or not less than
2499 * the length of the <code>char</code> array.
2500 * @since 1.5
2501 */
2502 public static int codePointBefore(char[] a, int index, int start) {
2503 if (index <= start || start < 0 || start >= a.length) {
2504 throw new IndexOutOfBoundsException();
2505 }
2506 return codePointBeforeImpl(a, index, start);
2507 }
2508
2509 static int codePointBeforeImpl(char[] a, int index, int start) {
2510 char c2 = a[--index];
2511 if (isLowSurrogate(c2)) {
2512 if (index > start) {
2513 char c1 = a[--index];
2514 if (isHighSurrogate(c1)) {
2515 return toCodePoint(c1, c2);
2516 }
2517 }
2518 }
2519 return c2;
2520 }
2521
2522 /**
2523 * Converts the specified character (Unicode code point) to its
2524 * UTF-16 representation. If the specified code point is a BMP
2525 * (Basic Multilingual Plane or Plane 0) value, the same value is
2526 * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
2527 * specified code point is a supplementary character, its
2528 * surrogate values are stored in <code>dst[dstIndex]</code>
2529 * (high-surrogate) and <code>dst[dstIndex+1]</code>
2530 * (low-surrogate), and 2 is returned.
2531 *
2532 * @param codePoint the character (Unicode code point) to be converted.
2533 * @param dst an array of <code>char</code> in which the
2534 * <code>codePoint</code>'s UTF-16 value is stored.
2535 * @param dstIndex the start index into the <code>dst</code>
2536 * array where the converted value is stored.
2537 * @return 1 if the code point is a BMP code point, 2 if the
2538 * code point is a supplementary code point.
2539 * @exception IllegalArgumentException if the specified
2540 * <code>codePoint</code> is not a valid Unicode code point.
2541 * @exception NullPointerException if the specified <code>dst</code> is null.
2542 * @exception IndexOutOfBoundsException if <code>dstIndex</code>
2543 * is negative or not less than <code>dst.length</code>, or if
2544 * <code>dst</code> at <code>dstIndex</code> doesn't have enough
2545 * array element(s) to store the resulting <code>char</code>
2546 * value(s). (If <code>dstIndex</code> is equal to
2547 * <code>dst.length-1</code> and the specified
2548 * <code>codePoint</code> is a supplementary character, the
2549 * high-surrogate value is not stored in
2550 * <code>dst[dstIndex]</code>.)
2551 * @since 1.5
2552 */
2553 public static int toChars(int codePoint, char[] dst, int dstIndex) {
2554 if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2555 throw new IllegalArgumentException();
2556 }
2557 if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2558 dst[dstIndex] = (char) codePoint;
2559 return 1;
2560 }
2561 toSurrogates(codePoint, dst, dstIndex);
2562 return 2;
2563 }
2564
2565 /**
2566 * Converts the specified character (Unicode code point) to its
2567 * UTF-16 representation stored in a <code>char</code> array. If
2568 * the specified code point is a BMP (Basic Multilingual Plane or
2569 * Plane 0) value, the resulting <code>char</code> array has
2570 * the same value as <code>codePoint</code>. If the specified code
2571 * point is a supplementary code point, the resulting
2572 * <code>char</code> array has the corresponding surrogate pair.
2573 *
2574 * @param codePoint a Unicode code point
2575 * @return a <code>char</code> array having
2576 * <code>codePoint</code>'s UTF-16 representation.
2577 * @exception IllegalArgumentException if the specified
2578 * <code>codePoint</code> is not a valid Unicode code point.
2579 * @since 1.5
2580 */
2581 public static char[] toChars(int codePoint) {
2582 if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2583 throw new IllegalArgumentException();
2584 }
2585 if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2586 return new char[] { (char) codePoint };
2587 }
2588 char[] result = new char[2];
2589 toSurrogates(codePoint, result, 0);
2590 return result;
2591 }
2592
2593 static void toSurrogates(int codePoint, char[] dst, int index) {
2594 int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
2595 dst[index+1] = (char)((offset & 0x3ff) + MIN_LOW_SURROGATE);
2596 dst[index] = (char)((offset >>> 10) + MIN_HIGH_SURROGATE);
2597 }
2598
2599 /**
2600 * Returns the number of Unicode code points in the text range of
2601 * the specified char sequence. The text range begins at the
2602 * specified <code>beginIndex</code> and extends to the
2603 * <code>char</code> at index <code>endIndex - 1</code>. Thus the
2604 * length (in <code>char</code>s) of the text range is
2605 * <code>endIndex-beginIndex</code>. Unpaired surrogates within
2606 * the text range count as one code point each.
2607 *
2608 * @param seq the char sequence
2609 * @param beginIndex the index to the first <code>char</code> of
2610 * the text range.
2611 * @param endIndex the index after the last <code>char</code> of
2612 * the text range.
2613 * @return the number of Unicode code points in the specified text
2614 * range
2615 * @exception NullPointerException if <code>seq</code> is null.
2616 * @exception IndexOutOfBoundsException if the
2617 * <code>beginIndex</code> is negative, or <code>endIndex</code>
2618 * is larger than the length of the given sequence, or
2619 * <code>beginIndex</code> is larger than <code>endIndex</code>.
2620 * @since 1.5
2621 */
2622 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
2623 int length = seq.length();
2624 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
2625 throw new IndexOutOfBoundsException();
2626 }
2627 int n = 0;
2628 for (int i = beginIndex; i < endIndex; ) {
2629 n++;
2630 if (isHighSurrogate(seq.charAt(i++))) {
2631 if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
2632 i++;
2633 }
2634 }
2635 }
2636 return n;
2637 }
2638
2639 /**
2640 * Returns the number of Unicode code points in a subarray of the
2641 * <code>char</code> array argument. The <code>offset</code>
2642 * argument is the index of the first <code>char</code> of the
2643 * subarray and the <code>count</code> argument specifies the
2644 * length of the subarray in <code>char</code>s. Unpaired
2645 * surrogates within the subarray count as one code point each.
2646 *
2647 * @param a the <code>char</code> array
2648 * @param offset the index of the first <code>char</code> in the
2649 * given <code>char</code> array
2650 * @param count the length of the subarray in <code>char</code>s
2651 * @return the number of Unicode code points in the specified subarray
2652 * @exception NullPointerException if <code>a</code> is null.
2653 * @exception IndexOutOfBoundsException if <code>offset</code> or
2654 * <code>count</code> is negative, or if <code>offset +
2655 * count</code> is larger than the length of the given array.
2656 * @since 1.5
2657 */
2658 public static int codePointCount(char[] a, int offset, int count) {
2659 if (count > a.length - offset || offset < 0 || count < 0) {
2660 throw new IndexOutOfBoundsException();
2661 }
2662 return codePointCountImpl(a, offset, count);
2663 }
2664
2665 static int codePointCountImpl(char[] a, int offset, int count) {
2666 int endIndex = offset + count;
2667 int n = 0;
2668 for (int i = offset; i < endIndex; ) {
2669 n++;
2670 if (isHighSurrogate(a[i++])) {
2671 if (i < endIndex && isLowSurrogate(a[i])) {
2672 i++;
2673 }
2674 }
2675 }
2676 return n;
2677 }
2678
2679 /**
2680 * Returns the index within the given char sequence that is offset
2681 * from the given <code>index</code> by <code>codePointOffset</code>
2682 * code points. Unpaired surrogates within the text range given by
2683 * <code>index</code> and <code>codePointOffset</code> count as
2684 * one code point each.
2685 *
2686 * @param seq the char sequence
2687 * @param index the index to be offset
2688 * @param codePointOffset the offset in code points
2689 * @return the index within the char sequence
2690 * @exception NullPointerException if <code>seq</code> is null.
2691 * @exception IndexOutOfBoundsException if <code>index</code>
2692 * is negative or larger then the length of the char sequence,
2693 * or if <code>codePointOffset</code> is positive and the
2694 * subsequence starting with <code>index</code> has fewer than
2695 * <code>codePointOffset</code> code points, or if
2696 * <code>codePointOffset</code> is negative and the subsequence
2697 * before <code>index</code> has fewer than the absolute value
2698 * of <code>codePointOffset</code> code points.
2699 * @since 1.5
2700 */
2701 public static int offsetByCodePoints(CharSequence seq, int index,
2702 int codePointOffset) {
2703 int length = seq.length();
2704 if (index < 0 || index > length) {
2705 throw new IndexOutOfBoundsException();
2706 }
2707
2708 int x = index;
2709 if (codePointOffset >= 0) {
2710 int i;
2711 for (i = 0; x < length && i < codePointOffset; i++) {
2712 if (isHighSurrogate(seq.charAt(x++))) {
2713 if (x < length && isLowSurrogate(seq.charAt(x))) {
2714 x++;
2715 }
2716 }
2717 }
2718 if (i < codePointOffset) {
2719 throw new IndexOutOfBoundsException();
2720 }
2721 } else {
2722 int i;
2723 for (i = codePointOffset; x > 0 && i < 0; i++) {
2724 if (isLowSurrogate(seq.charAt(--x))) {
2725 if (x > 0 && isHighSurrogate(seq.charAt(x-1))) {
2726 x--;
2727 }
2728 }
2729 }
2730 if (i < 0) {
2731 throw new IndexOutOfBoundsException();
2732 }
2733 }
2734 return x;
2735 }
2736
2737 /**
2738 * Returns the index within the given <code>char</code> subarray
2739 * that is offset from the given <code>index</code> by
2740 * <code>codePointOffset</code> code points. The
2741 * <code>start</code> and <code>count</code> arguments specify a
2742 * subarray of the <code>char</code> array. Unpaired surrogates
2743 * within the text range given by <code>index</code> and
2744 * <code>codePointOffset</code> count as one code point each.
2745 *
2746 * @param a the <code>char</code> array
2747 * @param start the index of the first <code>char</code> of the
2748 * subarray
2749 * @param count the length of the subarray in <code>char</code>s
2750 * @param index the index to be offset
2751 * @param codePointOffset the offset in code points
2752 * @return the index within the subarray
2753 * @exception NullPointerException if <code>a</code> is null.
2754 * @exception IndexOutOfBoundsException
2755 * if <code>start</code> or <code>count</code> is negative,
2756 * or if <code>start + count</code> is larger than the length of
2757 * the given array,
2758 * or if <code>index</code> is less than <code>start</code> or
2759 * larger then <code>start + count</code>,
2760 * or if <code>codePointOffset</code> is positive and the text range
2761 * starting with <code>index</code> and ending with <code>start
2762 * + count - 1</code> has fewer than <code>codePointOffset</code> code
2763 * points,
2764 * or if <code>codePointOffset</code> is negative and the text range
2765 * starting with <code>start</code> and ending with <code>index
2766 * - 1</code> has fewer than the absolute value of
2767 * <code>codePointOffset</code> code points.
2768 * @since 1.5
2769 */
2770 public static int offsetByCodePoints(char[] a, int start, int count,
2771 int index, int codePointOffset) {
2772 if (count > a.length-start || start < 0 || count < 0
2773 || index < start || index > start+count) {
2774 throw new IndexOutOfBoundsException();
2775 }
2776 return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
2777 }
2778
2779 static int offsetByCodePointsImpl(char[]a, int start, int count,
2780 int index, int codePointOffset) {
2781 int x = index;
2782 if (codePointOffset >= 0) {
2783 int limit = start + count;
2784 int i;
2785 for (i = 0; x < limit && i < codePointOffset; i++) {
2786 if (isHighSurrogate(a[x++])) {
2787 if (x < limit && isLowSurrogate(a[x])) {
2788 x++;
2789 }
2790 }
2791 }
2792 if (i < codePointOffset) {
2793 throw new IndexOutOfBoundsException();
2794 }
2795 } else {
2796 int i;
2797 for (i = codePointOffset; x > start && i < 0; i++) {
2798 if (isLowSurrogate(a[--x])) {
2799 if (x > start && isHighSurrogate(a[x-1])) {
2800 x--;
2801 }
2802 }
2803 }
2804 if (i < 0) {
2805 throw new IndexOutOfBoundsException();
2806 }
2807 }
2808 return x;
2809 }
2810
2811 /**
2812 * Determines if the specified character is a lowercase character.
2813 * <p>
2814 * A character is lowercase if its general category type, provided
2815 * by <code>Character.getType(ch)</code>, is
2816 * <code>LOWERCASE_LETTER</code>.
2817 * <p>
2818 * The following are examples of lowercase characters:
2819 * <p><blockquote><pre>
2820 * a b c d e f g h i j k l m n o p q r s t u v w x y z
2821 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
2822 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
2823 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
2824 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
2825 * </pre></blockquote>
2826 * <p> Many other Unicode characters are lowercase too.
2827 *
2828 * <p><b>Note:</b> This method cannot handle <a
2829 * href="#supplementary"> supplementary characters</a>. To support
2830 * all Unicode characters, including supplementary characters, use
2831 * the {@link #isLowerCase(int)} method.
2832 *
2833 * @param ch the character to be tested.
2834 * @return <code>true</code> if the character is lowercase;
2835 * <code>false</code> otherwise.
2836 * @see java.lang.Character#isLowerCase(char)
2837 * @see java.lang.Character#isTitleCase(char)
2838 * @see java.lang.Character#toLowerCase(char)
2839 * @see java.lang.Character#getType(char)
2840 */
2841 public static boolean isLowerCase(char ch) {
2842 return isLowerCase((int)ch);
2843 }
2844
2845 /**
2846 * Determines if the specified character (Unicode code point) is a
2847 * lowercase character.
2848 * <p>
2849 * A character is lowercase if its general category type, provided
2850 * by {@link Character#getType getType(codePoint)}, is
2851 * <code>LOWERCASE_LETTER</code>.
2852 * <p>
2853 * The following are examples of lowercase characters:
2854 * <p><blockquote><pre>
2855 * a b c d e f g h i j k l m n o p q r s t u v w x y z
2856 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
2857 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
2858 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
2859 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
2860 * </pre></blockquote>
2861 * <p> Many other Unicode characters are lowercase too.
2862 *
2863 * @param codePoint the character (Unicode code point) to be tested.
2864 * @return <code>true</code> if the character is lowercase;
2865 * <code>false</code> otherwise.
2866 * @see java.lang.Character#isLowerCase(int)
2867 * @see java.lang.Character#isTitleCase(int)
2868 * @see java.lang.Character#toLowerCase(int)
2869 * @see java.lang.Character#getType(int)
2870 * @since 1.5
2871 */
2872 public static boolean isLowerCase(int codePoint) {
2873 return getType(codePoint) == Character.LOWERCASE_LETTER;
2874 }
2875
2876 /**
2877 * Determines if the specified character is an uppercase character.
2878 * <p>
2879 * A character is uppercase if its general category type, provided by
2880 * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
2881 * <p>
2882 * The following are examples of uppercase characters:
2883 * <p><blockquote><pre>
2884 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2885 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
2886 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
2887 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
2888 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
2889 * </pre></blockquote>
2890 * <p> Many other Unicode characters are uppercase too.<p>
2891 *
2892 * <p><b>Note:</b> This method cannot handle <a
2893 * href="#supplementary"> supplementary characters</a>. To support
2894 * all Unicode characters, including supplementary characters, use
2895 * the {@link #isUpperCase(int)} method.
2896 *
2897 * @param ch the character to be tested.
2898 * @return <code>true</code> if the character is uppercase;
2899 * <code>false</code> otherwise.
2900 * @see java.lang.Character#isLowerCase(char)
2901 * @see java.lang.Character#isTitleCase(char)
2902 * @see java.lang.Character#toUpperCase(char)
2903 * @see java.lang.Character#getType(char)
2904 * @since 1.0
2905 */
2906 public static boolean isUpperCase(char ch) {
2907 return isUpperCase((int)ch);
2908 }
2909
2910 /**
2911 * Determines if the specified character (Unicode code point) is an uppercase character.
2912 * <p>
2913 * A character is uppercase if its general category type, provided by
2914 * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
2915 * <p>
2916 * The following are examples of uppercase characters:
2917 * <p><blockquote><pre>
2918 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2919 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
2920 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
2921 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
2922 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
2923 * </pre></blockquote>
2924 * <p> Many other Unicode characters are uppercase too.<p>
2925 *
2926 * @param codePoint the character (Unicode code point) to be tested.
2927 * @return <code>true</code> if the character is uppercase;
2928 * <code>false</code> otherwise.
2929 * @see java.lang.Character#isLowerCase(int)
2930 * @see java.lang.Character#isTitleCase(int)
2931 * @see java.lang.Character#toUpperCase(int)
2932 * @see java.lang.Character#getType(int)
2933 * @since 1.5
2934 */
2935 public static boolean isUpperCase(int codePoint) {
2936 return getType(codePoint) == Character.UPPERCASE_LETTER;
2937 }
2938
2939 /**
2940 * Determines if the specified character is a titlecase character.
2941 * <p>
2942 * A character is a titlecase character if its general
2943 * category type, provided by <code>Character.getType(ch)</code>,
2944 * is <code>TITLECASE_LETTER</code>.
2945 * <p>
2946 * Some characters look like pairs of Latin letters. For example, there
2947 * is an uppercase letter that looks like "LJ" and has a corresponding
2948 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
2949 * is the appropriate form to use when rendering a word in lowercase
2950 * with initial capitals, as for a book title.
2951 * <p>
2952 * These are some of the Unicode characters for which this method returns
2953 * <code>true</code>:
2954 * <ul>
2955 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
2956 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
2957 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
2958 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
2959 * </ul>
2960 * <p> Many other Unicode characters are titlecase too.<p>
2961 *
2962 * <p><b>Note:</b> This method cannot handle <a
2963 * href="#supplementary"> supplementary characters</a>. To support
2964 * all Unicode characters, including supplementary characters, use
2965 * the {@link #isTitleCase(int)} method.
2966 *
2967 * @param ch the character to be tested.
2968 * @return <code>true</code> if the character is titlecase;
2969 * <code>false</code> otherwise.
2970 * @see java.lang.Character#isLowerCase(char)
2971 * @see java.lang.Character#isUpperCase(char)
2972 * @see java.lang.Character#toTitleCase(char)
2973 * @see java.lang.Character#getType(char)
2974 * @since 1.0.2
2975 */
2976 public static boolean isTitleCase(char ch) {
2977 return isTitleCase((int)ch);
2978 }
2979
2980 /**
2981 * Determines if the specified character (Unicode code point) is a titlecase character.
2982 * <p>
2983 * A character is a titlecase character if its general
2984 * category type, provided by {@link Character#getType(int) getType(codePoint)},
2985 * is <code>TITLECASE_LETTER</code>.
2986 * <p>
2987 * Some characters look like pairs of Latin letters. For example, there
2988 * is an uppercase letter that looks like "LJ" and has a corresponding
2989 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
2990 * is the appropriate form to use when rendering a word in lowercase
2991 * with initial capitals, as for a book title.
2992 * <p>
2993 * These are some of the Unicode characters for which this method returns
2994 * <code>true</code>:
2995 * <ul>
2996 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
2997 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
2998 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
2999 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3000 * </ul>
3001 * <p> Many other Unicode characters are titlecase too.<p>
3002 *
3003 * @param codePoint the character (Unicode code point) to be tested.
3004 * @return <code>true</code> if the character is titlecase;
3005 * <code>false</code> otherwise.
3006 * @see java.lang.Character#isLowerCase(int)
3007 * @see java.lang.Character#isUpperCase(int)
3008 * @see java.lang.Character#toTitleCase(int)
3009 * @see java.lang.Character#getType(int)
3010 * @since 1.5
3011 */
3012 public static boolean isTitleCase(int codePoint) {
3013 return getType(codePoint) == Character.TITLECASE_LETTER;
3014 }
3015
3016 /**
3017 * Determines if the specified character is a digit.
3018 * <p>
3019 * A character is a digit if its general category type, provided
3020 * by <code>Character.getType(ch)</code>, is
3021 * <code>DECIMAL_DIGIT_NUMBER</code>.
3022 * <p>
3023 * Some Unicode character ranges that contain digits:
3024 * <ul>
3025 * <li><code>'\u0030'</code> through <code>'\u0039'</code>,
3026 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3027 * <li><code>'\u0660'</code> through <code>'\u0669'</code>,
3028 * Arabic-Indic digits
3029 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>,
3030 * Extended Arabic-Indic digits
3031 * <li><code>'\u0966'</code> through <code>'\u096F'</code>,
3032 * Devanagari digits
3033 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>,
3034 * Fullwidth digits
3035 * </ul>
3036 *
3037 * Many other character ranges contain digits as well.
3038 *
3039 * <p><b>Note:</b> This method cannot handle <a
3040 * href="#supplementary"> supplementary characters</a>. To support
3041 * all Unicode characters, including supplementary characters, use
3042 * the {@link #isDigit(int)} method.
3043 *
3044 * @param ch the character to be tested.
3045 * @return <code>true</code> if the character is a digit;
3046 * <code>false</code> otherwise.
3047 * @see java.lang.Character#digit(char, int)
3048 * @see java.lang.Character#forDigit(int, int)
3049 * @see java.lang.Character#getType(char)
3050 */
3051 public static boolean isDigit(char ch) {
3052 return isDigit((int)ch);
3053 }
3054
3055 /**
3056 * Determines if the specified character (Unicode code point) is a digit.
3057 * <p>
3058 * A character is a digit if its general category type, provided
3059 * by {@link Character#getType(int) getType(codePoint)}, is
3060 * <code>DECIMAL_DIGIT_NUMBER</code>.
3061 * <p>
3062 * Some Unicode character ranges that contain digits:
3063 * <ul>
3064 * <li><code>'\u0030'</code> through <code>'\u0039'</code>,
3065 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3066 * <li><code>'\u0660'</code> through <code>'\u0669'</code>,
3067 * Arabic-Indic digits
3068 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>,
3069 * Extended Arabic-Indic digits
3070 * <li><code>'\u0966'</code> through <code>'\u096F'</code>,
3071 * Devanagari digits
3072 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>,
3073 * Fullwidth digits
3074 * </ul>
3075 *
3076 * Many other character ranges contain digits as well.
3077 *
3078 * @param codePoint the character (Unicode code point) to be tested.
3079 * @return <code>true</code> if the character is a digit;
3080 * <code>false</code> otherwise.
3081 * @see java.lang.Character#forDigit(int, int)
3082 * @see java.lang.Character#getType(int)
3083 * @since 1.5
3084 */
3085 public static boolean isDigit(int codePoint) {
3086 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
3087 }
3088
3089 /**
3090 * Determines if a character is defined in Unicode.
3091 * <p>
3092 * A character is defined if at least one of the following is true:
3093 * <ul>
3094 * <li>It has an entry in the UnicodeData file.
3095 * <li>It has a value in a range defined by the UnicodeData file.
3096 * </ul>
3097 *
3098 * <p><b>Note:</b> This method cannot handle <a
3099 * href="#supplementary"> supplementary characters</a>. To support
3100 * all Unicode characters, including supplementary characters, use
3101 * the {@link #isDefined(int)} method.
3102 *
3103 * @param ch the character to be tested
3104 * @return <code>true</code> if the character has a defined meaning
3105 * in Unicode; <code>false</code> otherwise.
3106 * @see java.lang.Character#isDigit(char)
3107 * @see java.lang.Character#isLetter(char)
3108 * @see java.lang.Character#isLetterOrDigit(char)
3109 * @see java.lang.Character#isLowerCase(char)
3110 * @see java.lang.Character#isTitleCase(char)
3111 * @see java.lang.Character#isUpperCase(char)
3112 * @since 1.0.2
3113 */
3114 public static boolean isDefined(char ch) {
3115 return isDefined((int)ch);
3116 }
3117
3118 /**
3119 * Determines if a character (Unicode code point) is defined in Unicode.
3120 * <p>
3121 * A character is defined if at least one of the following is true:
3122 * <ul>
3123 * <li>It has an entry in the UnicodeData file.
3124 * <li>It has a value in a range defined by the UnicodeData file.
3125 * </ul>
3126 *
3127 * @param codePoint the character (Unicode code point) to be tested.
3128 * @return <code>true</code> if the character has a defined meaning
3129 * in Unicode; <code>false</code> otherwise.
3130 * @see java.lang.Character#isDigit(int)
3131 * @see java.lang.Character#isLetter(int)
3132 * @see java.lang.Character#isLetterOrDigit(int)
3133 * @see java.lang.Character#isLowerCase(int)
3134 * @see java.lang.Character#isTitleCase(int)
3135 * @see java.lang.Character#isUpperCase(int)
3136 * @since 1.5
3137 */
3138 public static boolean isDefined(int codePoint) {
3139 return getType(codePoint) != Character.UNASSIGNED;
3140 }
3141
3142 /**
3143 * Determines if the specified character is a letter.
3144 * <p>
3145 * A character is considered to be a letter if its general
3146 * category type, provided by <code>Character.getType(ch)</code>,
3147 * is any of the following:
3148 * <ul>
3149 * <li> <code>UPPERCASE_LETTER</code>
3150 * <li> <code>LOWERCASE_LETTER</code>
3151 * <li> <code>TITLECASE_LETTER</code>
3152 * <li> <code>MODIFIER_LETTER</code>
3153 * <li> <code>OTHER_LETTER</code>
3154 * </ul>
3155 *
3156 * Not all letters have case. Many characters are
3157 * letters but are neither uppercase nor lowercase nor titlecase.
3158 *
3159 * <p><b>Note:</b> This method cannot handle <a
3160 * href="#supplementary"> supplementary characters</a>. To support
3161 * all Unicode characters, including supplementary characters, use
3162 * the {@link #isLetter(int)} method.
3163 *
3164 * @param ch the character to be tested.
3165 * @return <code>true</code> if the character is a letter;
3166 * <code>false</code> otherwise.
3167 * @see java.lang.Character#isDigit(char)
3168 * @see java.lang.Character#isJavaIdentifierStart(char)
3169 * @see java.lang.Character#isJavaLetter(char)
3170 * @see java.lang.Character#isJavaLetterOrDigit(char)
3171 * @see java.lang.Character#isLetterOrDigit(char)
3172 * @see java.lang.Character#isLowerCase(char)
3173 * @see java.lang.Character#isTitleCase(char)
3174 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3175 * @see java.lang.Character#isUpperCase(char)
3176 */
3177 public static boolean isLetter(char ch) {
3178 return isLetter((int)ch);
3179 }
3180
3181 /**
3182 * Determines if the specified character (Unicode code point) is a letter.
3183 * <p>
3184 * A character is considered to be a letter if its general
3185 * category type, provided by {@link Character#getType(int) getType(codePoint)},
3186 * is any of the following:
3187 * <ul>
3188 * <li> <code>UPPERCASE_LETTER</code>
3189 * <li> <code>LOWERCASE_LETTER</code>
3190 * <li> <code>TITLECASE_LETTER</code>
3191 * <li> <code>MODIFIER_LETTER</code>
3192 * <li> <code>OTHER_LETTER</code>
3193 * </ul>
3194 *
3195 * Not all letters have case. Many characters are
3196 * letters but are neither uppercase nor lowercase nor titlecase.
3197 *
3198 * @param codePoint the character (Unicode code point) to be tested.
3199 * @return <code>true</code> if the character is a letter;
3200 * <code>false</code> otherwise.
3201 * @see java.lang.Character#isDigit(int)
3202 * @see java.lang.Character#isJavaIdentifierStart(int)
3203 * @see java.lang.Character#isLetterOrDigit(int)
3204 * @see java.lang.Character#isLowerCase(int)
3205 * @see java.lang.Character#isTitleCase(int)
3206 * @see java.lang.Character#isUnicodeIdentifierStart(int)
3207 * @see java.lang.Character#isUpperCase(int)
3208 * @since 1.5
3209 */
3210 public static boolean isLetter(int codePoint) {
3211 return ((((1 << Character.UPPERCASE_LETTER) |
3212 (1 << Character.LOWERCASE_LETTER) |
3213 (1 << Character.TITLECASE_LETTER) |
3214 (1 << Character.MODIFIER_LETTER) |
3215 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
3216 != 0;
3217 }
3218
3219 /**
3220 * Determines if the specified character is a letter or digit.
3221 * <p>
3222 * A character is considered to be a letter or digit if either
3223 * <code>Character.isLetter(char ch)</code> or
3224 * <code>Character.isDigit(char ch)</code> returns
3225 * <code>true</code> for the character.
3226 *
3227 * <p><b>Note:</b> This method cannot handle <a
3228 * href="#supplementary"> supplementary characters</a>. To support
3229 * all Unicode characters, including supplementary characters, use
3230 * the {@link #isLetterOrDigit(int)} method.
3231 *
3232 * @param ch the character to be tested.
3233 * @return <code>true</code> if the character is a letter or digit;
3234 * <code>false</code> otherwise.
3235 * @see java.lang.Character#isDigit(char)
3236 * @see java.lang.Character#isJavaIdentifierPart(char)
3237 * @see java.lang.Character#isJavaLetter(char)
3238 * @see java.lang.Character#isJavaLetterOrDigit(char)
3239 * @see java.lang.Character#isLetter(char)
3240 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3241 * @since 1.0.2
3242 */
3243 public static boolean isLetterOrDigit(char ch) {
3244 return isLetterOrDigit((int)ch);
3245 }
3246
3247 /**
3248 * Determines if the specified character (Unicode code point) is a letter or digit.
3249 * <p>
3250 * A character is considered to be a letter or digit if either
3251 * {@link #isLetter(int) isLetter(codePoint)} or
3252 * {@link #isDigit(int) isDigit(codePoint)} returns
3253 * <code>true</code> for the character.
3254 *
3255 * @param codePoint the character (Unicode code point) to be tested.
3256 * @return <code>true</code> if the character is a letter or digit;
3257 * <code>false</code> otherwise.
3258 * @see java.lang.Character#isDigit(int)
3259 * @see java.lang.Character#isJavaIdentifierPart(int)
3260 * @see java.lang.Character#isLetter(int)
3261 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3262 * @since 1.5
3263 */
3264 public static boolean isLetterOrDigit(int codePoint) {
3265 return ((((1 << Character.UPPERCASE_LETTER) |
3266 (1 << Character.LOWERCASE_LETTER) |
3267 (1 << Character.TITLECASE_LETTER) |
3268 (1 << Character.MODIFIER_LETTER) |
3269 (1 << Character.OTHER_LETTER) |
3270 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
3271 != 0;
3272 }
3273
3274 /**
3275 * Determines if the specified character is permissible as the first
3276 * character in a Java identifier.
3277 * <p>
3278 * A character may start a Java identifier if and only if
3279 * one of the following is true:
3280 * <ul>
3281 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3282 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3283 * <li> ch is a currency symbol (such as "$")
3284 * <li> ch is a connecting punctuation character (such as "_").
3285 * </ul>
3286 *
3287 * @param ch the character to be tested.
3288 * @return <code>true</code> if the character may start a Java
3289 * identifier; <code>false</code> otherwise.
3290 * @see java.lang.Character#isJavaLetterOrDigit(char)
3291 * @see java.lang.Character#isJavaIdentifierStart(char)
3292 * @see java.lang.Character#isJavaIdentifierPart(char)
3293 * @see java.lang.Character#isLetter(char)
3294 * @see java.lang.Character#isLetterOrDigit(char)
3295 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3296 * @since 1.02
3297 * @deprecated Replaced by isJavaIdentifierStart(char).
3298 */
3299 @Deprecated
3300 public static boolean isJavaLetter(char ch) {
3301 return isJavaIdentifierStart(ch);
3302 }
3303
3304 /**
3305 * Determines if the specified character may be part of a Java
3306 * identifier as other than the first character.
3307 * <p>
3308 * A character may be part of a Java identifier if and only if any
3309 * of the following are true:
3310 * <ul>
3311 * <li> it is a letter
3312 * <li> it is a currency symbol (such as <code>'$'</code>)
3313 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3314 * <li> it is a digit
3315 * <li> it is a numeric letter (such as a Roman numeral character)
3316 * <li> it is a combining mark
3317 * <li> it is a non-spacing mark
3318 * <li> <code>isIdentifierIgnorable</code> returns
3319 * <code>true</code> for the character.
3320 * </ul>
3321 *
3322 * @param ch the character to be tested.
3323 * @return <code>true</code> if the character may be part of a
3324 * Java identifier; <code>false</code> otherwise.
3325 * @see java.lang.Character#isJavaLetter(char)
3326 * @see java.lang.Character#isJavaIdentifierStart(char)
3327 * @see java.lang.Character#isJavaIdentifierPart(char)
3328 * @see java.lang.Character#isLetter(char)
3329 * @see java.lang.Character#isLetterOrDigit(char)
3330 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3331 * @see java.lang.Character#isIdentifierIgnorable(char)
3332 * @since 1.02
3333 * @deprecated Replaced by isJavaIdentifierPart(char).
3334 */
3335 @Deprecated
3336 public static boolean isJavaLetterOrDigit(char ch) {
3337 return isJavaIdentifierPart(ch);
3338 }
3339
3340 /**
3341 * Determines if the specified character is
3342 * permissible as the first character in a Java identifier.
3343 * <p>
3344 * A character may start a Java identifier if and only if
3345 * one of the following conditions is true:
3346 * <ul>
3347 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3348 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3349 * <li> ch is a currency symbol (such as "$")
3350 * <li> ch is a connecting punctuation character (such as "_").
3351 * </ul>
3352 *
3353 * <p><b>Note:</b> This method cannot handle <a
3354 * href="#supplementary"> supplementary characters</a>. To support
3355 * all Unicode characters, including supplementary characters, use
3356 * the {@link #isJavaIdentifierStart(int)} method.
3357 *
3358 * @param ch the character to be tested.
3359 * @return <code>true</code> if the character may start a Java identifier;
3360 * <code>false</code> otherwise.
3361 * @see java.lang.Character#isJavaIdentifierPart(char)
3362 * @see java.lang.Character#isLetter(char)
3363 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3364 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3365 * @since 1.1
3366 */
3367 public static boolean isJavaIdentifierStart(char ch) {
3368 return isJavaIdentifierStart((int)ch);
3369 }
3370
3371 /**
3372 * Determines if the character (Unicode code point) is
3373 * permissible as the first character in a Java identifier.
3374 * <p>
3375 * A character may start a Java identifier if and only if
3376 * one of the following conditions is true:
3377 * <ul>
3378 * <li> {@link #isLetter(int) isLetter(codePoint)}
3379 * returns <code>true</code>
3380 * <li> {@link #getType(int) getType(codePoint)}
3381 * returns <code>LETTER_NUMBER</code>
3382 * <li> the referenced character is a currency symbol (such as "$")
3383 * <li> the referenced character is a connecting punctuation character
3384 * (such as "_").
3385 * </ul>
3386 *
3387 * @param codePoint the character (Unicode code point) to be tested.
3388 * @return <code>true</code> if the character may start a Java identifier;
3389 * <code>false</code> otherwise.
3390 * @see java.lang.Character#isJavaIdentifierPart(int)
3391 * @see java.lang.Character#isLetter(int)
3392 * @see java.lang.Character#isUnicodeIdentifierStart(int)
3393 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3394 * @since 1.5
3395 */
3396 public static boolean isJavaIdentifierStart(int codePoint) {
3397 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
3398 }
3399
3400 /**
3401 * Determines if the specified character may be part of a Java
3402 * identifier as other than the first character.
3403 * <p>
3404 * A character may be part of a Java identifier if any of the following
3405 * are true:
3406 * <ul>
3407 * <li> it is a letter
3408 * <li> it is a currency symbol (such as <code>'$'</code>)
3409 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3410 * <li> it is a digit
3411 * <li> it is a numeric letter (such as a Roman numeral character)
3412 * <li> it is a combining mark
3413 * <li> it is a non-spacing mark
3414 * <li> <code>isIdentifierIgnorable</code> returns
3415 * <code>true</code> for the character
3416 * </ul>
3417 *
3418 * <p><b>Note:</b> This method cannot handle <a
3419 * href="#supplementary"> supplementary characters</a>. To support
3420 * all Unicode characters, including supplementary characters, use
3421 * the {@link #isJavaIdentifierPart(int)} method.
3422 *
3423 * @param ch the character to be tested.
3424 * @return <code>true</code> if the character may be part of a
3425 * Java identifier; <code>false</code> otherwise.
3426 * @see java.lang.Character#isIdentifierIgnorable(char)
3427 * @see java.lang.Character#isJavaIdentifierStart(char)
3428 * @see java.lang.Character#isLetterOrDigit(char)
3429 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3430 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3431 * @since 1.1
3432 */
3433 public static boolean isJavaIdentifierPart(char ch) {
3434 return isJavaIdentifierPart((int)ch);
3435 }
3436
3437 /**
3438 * Determines if the character (Unicode code point) may be part of a Java
3439 * identifier as other than the first character.
3440 * <p>
3441 * A character may be part of a Java identifier if any of the following
3442 * are true:
3443 * <ul>
3444 * <li> it is a letter
3445 * <li> it is a currency symbol (such as <code>'$'</code>)
3446 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3447 * <li> it is a digit
3448 * <li> it is a numeric letter (such as a Roman numeral character)
3449 * <li> it is a combining mark
3450 * <li> it is a non-spacing mark
3451 * <li> {@link #isIdentifierIgnorable(int)
3452 * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
3453 * the character
3454 * </ul>
3455 *
3456 * @param codePoint the character (Unicode code point) to be tested.
3457 * @return <code>true</code> if the character may be part of a
3458 * Java identifier; <code>false</code> otherwise.
3459 * @see java.lang.Character#isIdentifierIgnorable(int)
3460 * @see java.lang.Character#isJavaIdentifierStart(int)
3461 * @see java.lang.Character#isLetterOrDigit(int)
3462 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3463 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3464 * @since 1.5
3465 */
3466 public static boolean isJavaIdentifierPart(int codePoint) {
3467 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
3468 }
3469
3470 /**
3471 * Determines if the specified character is permissible as the
3472 * first character in a Unicode identifier.
3473 * <p>
3474 * A character may start a Unicode identifier if and only if
3475 * one of the following conditions is true:
3476 * <ul>
3477 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3478 * <li> {@link #getType(char) getType(ch)} returns
3479 * <code>LETTER_NUMBER</code>.
3480 * </ul>
3481 *
3482 * <p><b>Note:</b> This method cannot handle <a
3483 * href="#supplementary"> supplementary characters</a>. To support
3484 * all Unicode characters, including supplementary characters, use
3485 * the {@link #isUnicodeIdentifierStart(int)} method.
3486 *
3487 * @param ch the character to be tested.
3488 * @return <code>true</code> if the character may start a Unicode
3489 * identifier; <code>false</code> otherwise.
3490 * @see java.lang.Character#isJavaIdentifierStart(char)
3491 * @see java.lang.Character#isLetter(char)
3492 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3493 * @since 1.1
3494 */
3495 public static boolean isUnicodeIdentifierStart(char ch) {
3496 return isUnicodeIdentifierStart((int)ch);
3497 }
3498
3499 /**
3500 * Determines if the specified character (Unicode code point) is permissible as the
3501 * first character in a Unicode identifier.
3502 * <p>
3503 * A character may start a Unicode identifier if and only if
3504 * one of the following conditions is true:
3505 * <ul>
3506 * <li> {@link #isLetter(int) isLetter(codePoint)}
3507 * returns <code>true</code>
3508 * <li> {@link #getType(int) getType(codePoint)}
3509 * returns <code>LETTER_NUMBER</code>.
3510 * </ul>
3511 * @param codePoint the character (Unicode code point) to be tested.
3512 * @return <code>true</code> if the character may start a Unicode
3513 * identifier; <code>false</code> otherwise.
3514 * @see java.lang.Character#isJavaIdentifierStart(int)
3515 * @see java.lang.Character#isLetter(int)
3516 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3517 * @since 1.5
3518 */
3519 public static boolean isUnicodeIdentifierStart(int codePoint) {
3520 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
3521 }
3522
3523 /**
3524 * Determines if the specified character may be part of a Unicode
3525 * identifier as other than the first character.
3526 * <p>
3527 * A character may be part of a Unicode identifier if and only if
3528 * one of the following statements is true:
3529 * <ul>
3530 * <li> it is a letter
3531 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3532 * <li> it is a digit
3533 * <li> it is a numeric letter (such as a Roman numeral character)
3534 * <li> it is a combining mark
3535 * <li> it is a non-spacing mark
3536 * <li> <code>isIdentifierIgnorable</code> returns
3537 * <code>true</code> for this character.
3538 * </ul>
3539 *
3540 * <p><b>Note:</b> This method cannot handle <a
3541 * href="#supplementary"> supplementary characters</a>. To support
3542 * all Unicode characters, including supplementary characters, use
3543 * the {@link #isUnicodeIdentifierPart(int)} method.
3544 *
3545 * @param ch the character to be tested.
3546 * @return <code>true</code> if the character may be part of a
3547 * Unicode identifier; <code>false</code> otherwise.
3548 * @see java.lang.Character#isIdentifierIgnorable(char)
3549 * @see java.lang.Character#isJavaIdentifierPart(char)
3550 * @see java.lang.Character#isLetterOrDigit(char)
3551 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3552 * @since 1.1
3553 */
3554 public static boolean isUnicodeIdentifierPart(char ch) {
3555 return isUnicodeIdentifierPart((int)ch);
3556 }
3557
3558 /**
3559 * Determines if the specified character (Unicode code point) may be part of a Unicode
3560 * identifier as other than the first character.
3561 * <p>
3562 * A character may be part of a Unicode identifier if and only if
3563 * one of the following statements is true:
3564 * <ul>
3565 * <li> it is a letter
3566 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3567 * <li> it is a digit
3568 * <li> it is a numeric letter (such as a Roman numeral character)
3569 * <li> it is a combining mark
3570 * <li> it is a non-spacing mark
3571 * <li> <code>isIdentifierIgnorable</code> returns
3572 * <code>true</code> for this character.
3573 * </ul>
3574 * @param codePoint the character (Unicode code point) to be tested.
3575 * @return <code>true</code> if the character may be part of a
3576 * Unicode identifier; <code>false</code> otherwise.
3577 * @see java.lang.Character#isIdentifierIgnorable(int)
3578 * @see java.lang.Character#isJavaIdentifierPart(int)
3579 * @see java.lang.Character#isLetterOrDigit(int)
3580 * @see java.lang.Character#isUnicodeIdentifierStart(int)
3581 * @since 1.5
3582 */
3583 public static boolean isUnicodeIdentifierPart(int codePoint) {
3584 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
3585 }
3586
3587 /**
3588 * Determines if the specified character should be regarded as
3589 * an ignorable character in a Java identifier or a Unicode identifier.
3590 * <p>
3591 * The following Unicode characters are ignorable in a Java identifier
3592 * or a Unicode identifier:
3593 * <ul>
3594 * <li>ISO control characters that are not whitespace
3595 * <ul>
3596 * <li><code>'\u0000'</code> through <code>'\u0008'</code>
3597 * <li><code>'\u000E'</code> through <code>'\u001B'</code>
3598 * <li><code>'\u007F'</code> through <code>'\u009F'</code>
3599 * </ul>
3600 *
3601 * <li>all characters that have the <code>FORMAT</code> general
3602 * category value
3603 * </ul>
3604 *
3605 * <p><b>Note:</b> This method cannot handle <a
3606 * href="#supplementary"> supplementary characters</a>. To support
3607 * all Unicode characters, including supplementary characters, use
3608 * the {@link #isIdentifierIgnorable(int)} method.
3609 *
3610 * @param ch the character to be tested.
3611 * @return <code>true</code> if the character is an ignorable control
3612 * character that may be part of a Java or Unicode identifier;
3613 * <code>false</code> otherwise.
3614 * @see java.lang.Character#isJavaIdentifierPart(char)
3615 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3616 * @since 1.1
3617 */
3618 public static boolean isIdentifierIgnorable(char ch) {
3619 return isIdentifierIgnorable((int)ch);
3620 }
3621
3622 /**
3623 * Determines if the specified character (Unicode code point) should be regarded as
3624 * an ignorable character in a Java identifier or a Unicode identifier.
3625 * <p>
3626 * The following Unicode characters are ignorable in a Java identifier
3627 * or a Unicode identifier:
3628 * <ul>
3629 * <li>ISO control characters that are not whitespace
3630 * <ul>
3631 * <li><code>'\u0000'</code> through <code>'\u0008'</code>
3632 * <li><code>'\u000E'</code> through <code>'\u001B'</code>
3633 * <li><code>'\u007F'</code> through <code>'\u009F'</code>
3634 * </ul>
3635 *
3636 * <li>all characters that have the <code>FORMAT</code> general
3637 * category value
3638 * </ul>
3639 *
3640 * @param codePoint the character (Unicode code point) to be tested.
3641 * @return <code>true</code> if the character is an ignorable control
3642 * character that may be part of a Java or Unicode identifier;
3643 * <code>false</code> otherwise.
3644 * @see java.lang.Character#isJavaIdentifierPart(int)
3645 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3646 * @since 1.5
3647 */
3648 public static boolean isIdentifierIgnorable(int codePoint) {
3649 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
3650 }
3651
3652 /**
3653 * Converts the character argument to lowercase using case
3654 * mapping information from the UnicodeData file.
3655 * <p>
3656 * Note that
3657 * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
3658 * does not always return <code>true</code> for some ranges of
3659 * characters, particularly those that are symbols or ideographs.
3660 *
3661 * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
3662 * characters to lowercase. <code>String</code> case mapping methods
3663 * have several benefits over <code>Character</code> case mapping methods.
3664 * <code>String</code> case mapping methods can perform locale-sensitive
3665 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3666 * the <code>Character</code> case mapping methods cannot.
3667 *
3668 * <p><b>Note:</b> This method cannot handle <a
3669 * href="#supplementary"> supplementary characters</a>. To support
3670 * all Unicode characters, including supplementary characters, use
3671 * the {@link #toLowerCase(int)} method.
3672 *
3673 * @param ch the character to be converted.
3674 * @return the lowercase equivalent of the character, if any;
3675 * otherwise, the character itself.
3676 * @see java.lang.Character#isLowerCase(char)
3677 * @see java.lang.String#toLowerCase()
3678 */
3679 public static char toLowerCase(char ch) {
3680 return (char)toLowerCase((int)ch);
3681 }
3682
3683 /**
3684 * Converts the character (Unicode code point) argument to
3685 * lowercase using case mapping information from the UnicodeData
3686 * file.
3687 *
3688 * <p> Note that
3689 * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
3690 * does not always return <code>true</code> for some ranges of
3691 * characters, particularly those that are symbols or ideographs.
3692 *
3693 * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
3694 * characters to lowercase. <code>String</code> case mapping methods
3695 * have several benefits over <code>Character</code> case mapping methods.
3696 * <code>String</code> case mapping methods can perform locale-sensitive
3697 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3698 * the <code>Character</code> case mapping methods cannot.
3699 *
3700 * @param codePoint the character (Unicode code point) to be converted.
3701 * @return the lowercase equivalent of the character (Unicode code
3702 * point), if any; otherwise, the character itself.
3703 * @see java.lang.Character#isLowerCase(int)
3704 * @see java.lang.String#toLowerCase()
3705 *
3706 * @since 1.5
3707 */
3708 public static int toLowerCase(int codePoint) {
3709 return CharacterData.of(codePoint).toLowerCase(codePoint);
3710 }
3711
3712 /**
3713 * Converts the character argument to uppercase using case mapping
3714 * information from the UnicodeData file.
3715 * <p>
3716 * Note that
3717 * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
3718 * does not always return <code>true</code> for some ranges of
3719 * characters, particularly those that are symbols or ideographs.
3720 *
3721 * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
3722 * characters to uppercase. <code>String</code> case mapping methods
3723 * have several benefits over <code>Character</code> case mapping methods.
3724 * <code>String</code> case mapping methods can perform locale-sensitive
3725 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3726 * the <code>Character</code> case mapping methods cannot.
3727 *
3728 * <p><b>Note:</b> This method cannot handle <a
3729 * href="#supplementary"> supplementary characters</a>. To support
3730 * all Unicode characters, including supplementary characters, use
3731 * the {@link #toUpperCase(int)} method.
3732 *
3733 * @param ch the character to be converted.
3734 * @return the uppercase equivalent of the character, if any;
3735 * otherwise, the character itself.
3736 * @see java.lang.Character#isUpperCase(char)
3737 * @see java.lang.String#toUpperCase()
3738 */
3739 public static char toUpperCase(char ch) {
3740 return (char)toUpperCase((int)ch);
3741 }
3742
3743 /**
3744 * Converts the character (Unicode code point) argument to
3745 * uppercase using case mapping information from the UnicodeData
3746 * file.
3747 *
3748 * <p>Note that
3749 * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
3750 * does not always return <code>true</code> for some ranges of
3751 * characters, particularly those that are symbols or ideographs.
3752 *
3753 * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
3754 * characters to uppercase. <code>String</code> case mapping methods
3755 * have several benefits over <code>Character</code> case mapping methods.
3756 * <code>String</code> case mapping methods can perform locale-sensitive
3757 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3758 * the <code>Character</code> case mapping methods cannot.
3759 *
3760 * @param codePoint the character (Unicode code point) to be converted.
3761 * @return the uppercase equivalent of the character, if any;
3762 * otherwise, the character itself.
3763 * @see java.lang.Character#isUpperCase(int)
3764 * @see java.lang.String#toUpperCase()
3765 *
3766 * @since 1.5
3767 */
3768 public static int toUpperCase(int codePoint) {
3769 return CharacterData.of(codePoint).toUpperCase(codePoint);
3770 }
3771
3772 /**
3773 * Converts the character argument to titlecase using case mapping
3774 * information from the UnicodeData file. If a character has no
3775 * explicit titlecase mapping and is not itself a titlecase char
3776 * according to UnicodeData, then the uppercase mapping is
3777 * returned as an equivalent titlecase mapping. If the
3778 * <code>char</code> argument is already a titlecase
3779 * <code>char</code>, the same <code>char</code> value will be
3780 * returned.
3781 * <p>
3782 * Note that
3783 * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
3784 * does not always return <code>true</code> for some ranges of
3785 * characters.
3786 *
3787 * <p><b>Note:</b> This method cannot handle <a
3788 * href="#supplementary"> supplementary characters</a>. To support
3789 * all Unicode characters, including supplementary characters, use
3790 * the {@link #toTitleCase(int)} method.
3791 *
3792 * @param ch the character to be converted.
3793 * @return the titlecase equivalent of the character, if any;
3794 * otherwise, the character itself.
3795 * @see java.lang.Character#isTitleCase(char)
3796 * @see java.lang.Character#toLowerCase(char)
3797 * @see java.lang.Character#toUpperCase(char)
3798 * @since 1.0.2
3799 */
3800 public static char toTitleCase(char ch) {
3801 return (char)toTitleCase((int)ch);
3802 }
3803
3804 /**
3805 * Converts the character (Unicode code point) argument to titlecase using case mapping
3806 * information from the UnicodeData file. If a character has no
3807 * explicit titlecase mapping and is not itself a titlecase char
3808 * according to UnicodeData, then the uppercase mapping is
3809 * returned as an equivalent titlecase mapping. If the
3810 * character argument is already a titlecase
3811 * character, the same character value will be
3812 * returned.
3813 *
3814 * <p>Note that
3815 * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
3816 * does not always return <code>true</code> for some ranges of
3817 * characters.
3818 *
3819 * @param codePoint the character (Unicode code point) to be converted.
3820 * @return the titlecase equivalent of the character, if any;
3821 * otherwise, the character itself.
3822 * @see java.lang.Character#isTitleCase(int)
3823 * @see java.lang.Character#toLowerCase(int)
3824 * @see java.lang.Character#toUpperCase(int)
3825 * @since 1.5
3826 */
3827 public static int toTitleCase(int codePoint) {
3828 return CharacterData.of(codePoint).toTitleCase(codePoint);
3829 }
3830
3831 /**
3832 * Returns the numeric value of the character <code>ch</code> in the
3833 * specified radix.
3834 * <p>
3835 * If the radix is not in the range <code>MIN_RADIX</code> <=
3836 * <code>radix</code> <= <code>MAX_RADIX</code> or if the
3837 * value of <code>ch</code> is not a valid digit in the specified
3838 * radix, <code>-1</code> is returned. A character is a valid digit
3839 * if at least one of the following is true:
3840 * <ul>
3841 * <li>The method <code>isDigit</code> is <code>true</code> of the character
3842 * and the Unicode decimal digit value of the character (or its
3843 * single-character decomposition) is less than the specified radix.
3844 * In this case the decimal digit value is returned.
3845 * <li>The character is one of the uppercase Latin letters
3846 * <code>'A'</code> through <code>'Z'</code> and its code is less than
3847 * <code>radix + 'A' - 10</code>.
3848 * In this case, <code>ch - 'A' + 10</code>
3849 * is returned.
3850 * <li>The character is one of the lowercase Latin letters
3851 * <code>'a'</code> through <code>'z'</code> and its code is less than
3852 * <code>radix + 'a' - 10</code>.
3853 * In this case, <code>ch - 'a' + 10</code>
3854 * is returned.
3855 * </ul>
3856 *
3857 * <p><b>Note:</b> This method cannot handle <a
3858 * href="#supplementary"> supplementary characters</a>. To support
3859 * all Unicode characters, including supplementary characters, use
3860 * the {@link #digit(int, int)} method.
3861 *
3862 * @param ch the character to be converted.
3863 * @param radix the radix.
3864 * @return the numeric value represented by the character in the
3865 * specified radix.
3866 * @see java.lang.Character#forDigit(int, int)
3867 * @see java.lang.Character#isDigit(char)
3868 */
3869 public static int digit(char ch, int radix) {
3870 return digit((int)ch, radix);
3871 }
3872
3873 /**
3874 * Returns the numeric value of the specified character (Unicode
3875 * code point) in the specified radix.
3876 *
3877 * <p>If the radix is not in the range <code>MIN_RADIX</code> <=
3878 * <code>radix</code> <= <code>MAX_RADIX</code> or if the
3879 * character is not a valid digit in the specified
3880 * radix, <code>-1</code> is returned. A character is a valid digit
3881 * if at least one of the following is true:
3882 * <ul>
3883 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
3884 * and the Unicode decimal digit value of the character (or its
3885 * single-character decomposition) is less than the specified radix.
3886 * In this case the decimal digit value is returned.
3887 * <li>The character is one of the uppercase Latin letters
3888 * <code>'A'</code> through <code>'Z'</code> and its code is less than
3889 * <code>radix + 'A' - 10</code>.
3890 * In this case, <code>ch - 'A' + 10</code>
3891 * is returned.
3892 * <li>The character is one of the lowercase Latin letters
3893 * <code>'a'</code> through <code>'z'</code> and its code is less than
3894 * <code>radix + 'a' - 10</code>.
3895 * In this case, <code>ch - 'a' + 10</code>
3896 * is returned.
3897 * </ul>
3898 *
3899 * @param codePoint the character (Unicode code point) to be converted.
3900 * @param radix the radix.
3901 * @return the numeric value represented by the character in the
3902 * specified radix.
3903 * @see java.lang.Character#forDigit(int, int)
3904 * @see java.lang.Character#isDigit(int)
3905 * @since 1.5
3906 */
3907 public static int digit(int codePoint, int radix) {
3908 return CharacterData.of(codePoint).digit(codePoint, radix);
3909 }
3910
3911 /**
3912 * Returns the <code>int</code> value that the specified Unicode
3913 * character represents. For example, the character
3914 * <code>'\u216C'</code> (the roman numeral fifty) will return
3915 * an int with a value of 50.
3916 * <p>
3917 * The letters A-Z in their uppercase (<code>'\u0041'</code> through
3918 * <code>'\u005A'</code>), lowercase
3919 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and
3920 * full width variant (<code>'\uFF21'</code> through
3921 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through
3922 * <code>'\uFF5A'</code>) forms have numeric values from 10
3923 * through 35. This is independent of the Unicode specification,
3924 * which does not assign numeric values to these <code>char</code>
3925 * values.
3926 * <p>
3927 * If the character does not have a numeric value, then -1 is returned.
3928 * If the character has a numeric value that cannot be represented as a
3929 * nonnegative integer (for example, a fractional value), then -2
3930 * is returned.
3931 *
3932 * <p><b>Note:</b> This method cannot handle <a
3933 * href="#supplementary"> supplementary characters</a>. To support
3934 * all Unicode characters, including supplementary characters, use
3935 * the {@link #getNumericValue(int)} method.
3936 *
3937 * @param ch the character to be converted.
3938 * @return the numeric value of the character, as a nonnegative <code>int</code>
3939 * value; -2 if the character has a numeric value that is not a
3940 * nonnegative integer; -1 if the character has no numeric value.
3941 * @see java.lang.Character#forDigit(int, int)
3942 * @see java.lang.Character#isDigit(char)
3943 * @since 1.1
3944 */
3945 public static int getNumericValue(char ch) {
3946 return getNumericValue((int)ch);
3947 }
3948
3949 /**
3950 * Returns the <code>int</code> value that the specified
3951 * character (Unicode code point) represents. For example, the character
3952 * <code>'\u216C'</code> (the Roman numeral fifty) will return
3953 * an <code>int</code> with a value of 50.
3954 * <p>
3955 * The letters A-Z in their uppercase (<code>'\u0041'</code> through
3956 * <code>'\u005A'</code>), lowercase
3957 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and
3958 * full width variant (<code>'\uFF21'</code> through
3959 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through
3960 * <code>'\uFF5A'</code>) forms have numeric values from 10
3961 * through 35. This is independent of the Unicode specification,
3962 * which does not assign numeric values to these <code>char</code>
3963 * values.
3964 * <p>
3965 * If the character does not have a numeric value, then -1 is returned.
3966 * If the character has a numeric value that cannot be represented as a
3967 * nonnegative integer (for example, a fractional value), then -2
3968 * is returned.
3969 *
3970 * @param codePoint the character (Unicode code point) to be converted.
3971 * @return the numeric value of the character, as a nonnegative <code>int</code>
3972 * value; -2 if the character has a numeric value that is not a
3973 * nonnegative integer; -1 if the character has no numeric value.
3974 * @see java.lang.Character#forDigit(int, int)
3975 * @see java.lang.Character#isDigit(int)
3976 * @since 1.5
3977 */
3978 public static int getNumericValue(int codePoint) {
3979 return CharacterData.of(codePoint).getNumericValue(codePoint);
3980 }
3981
3982 /**
3983 * Determines if the specified character is ISO-LATIN-1 white space.
3984 * This method returns <code>true</code> for the following five
3985 * characters only:
3986 * <table>
3987 * <tr><td><code>'\t'</code></td> <td><code>'\u0009'</code></td>
3988 * <td><code>HORIZONTAL TABULATION</code></td></tr>
3989 * <tr><td><code>'\n'</code></td> <td><code>'\u000A'</code></td>
3990 * <td><code>NEW LINE</code></td></tr>
3991 * <tr><td><code>'\f'</code></td> <td><code>'\u000C'</code></td>
3992 * <td><code>FORM FEED</code></td></tr>
3993 * <tr><td><code>'\r'</code></td> <td><code>'\u000D'</code></td>
3994 * <td><code>CARRIAGE RETURN</code></td></tr>
3995 * <tr><td><code>' '</code></td> <td><code>'\u0020'</code></td>
3996 * <td><code>SPACE</code></td></tr>
3997 * </table>
3998 *
3999 * @param ch the character to be tested.
4000 * @return <code>true</code> if the character is ISO-LATIN-1 white
4001 * space; <code>false</code> otherwise.
4002 * @see java.lang.Character#isSpaceChar(char)
4003 * @see java.lang.Character#isWhitespace(char)
4004 * @deprecated Replaced by isWhitespace(char).
4005 */
4006 @Deprecated
4007 public static boolean isSpace(char ch) {
4008 return (ch <= 0x0020) &&
4009 (((((1L << 0x0009) |
4010 (1L << 0x000A) |
4011 (1L << 0x000C) |
4012 (1L << 0x000D) |
4013 (1L << 0x0020)) >> ch) & 1L) != 0);
4014 }
4015
4016
4017 /**
4018 * Determines if the specified character is a Unicode space character.
4019 * A character is considered to be a space character if and only if
4020 * it is specified to be a space character by the Unicode standard. This
4021 * method returns true if the character's general category type is any of
4022 * the following:
4023 * <ul>
4024 * <li> <code>SPACE_SEPARATOR</code>
4025 * <li> <code>LINE_SEPARATOR</code>
4026 * <li> <code>PARAGRAPH_SEPARATOR</code>
4027 * </ul>
4028 *
4029 * <p><b>Note:</b> This method cannot handle <a
4030 * href="#supplementary"> supplementary characters</a>. To support
4031 * all Unicode characters, including supplementary characters, use
4032 * the {@link #isSpaceChar(int)} method.
4033 *
4034 * @param ch the character to be tested.
4035 * @return <code>true</code> if the character is a space character;
4036 * <code>false</code> otherwise.
4037 * @see java.lang.Character#isWhitespace(char)
4038 * @since 1.1
4039 */
4040 public static boolean isSpaceChar(char ch) {
4041 return isSpaceChar((int)ch);
4042 }
4043
4044 /**
4045 * Determines if the specified character (Unicode code point) is a
4046 * Unicode space character. A character is considered to be a
4047 * space character if and only if it is specified to be a space
4048 * character by the Unicode standard. This method returns true if
4049 * the character's general category type is any of the following:
4050 *
4051 * <ul>
4052 * <li> {@link #SPACE_SEPARATOR}
4053 * <li> {@link #LINE_SEPARATOR}
4054 * <li> {@link #PARAGRAPH_SEPARATOR}
4055 * </ul>
4056 *
4057 * @param codePoint the character (Unicode code point) to be tested.
4058 * @return <code>true</code> if the character is a space character;
4059 * <code>false</code> otherwise.
4060 * @see java.lang.Character#isWhitespace(int)
4061 * @since 1.5
4062 */
4063 public static boolean isSpaceChar(int codePoint) {
4064 return ((((1 << Character.SPACE_SEPARATOR) |
4065 (1 << Character.LINE_SEPARATOR) |
4066 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
4067 != 0;
4068 }
4069
4070 /**
4071 * Determines if the specified character is white space according to Java.
4072 * A character is a Java whitespace character if and only if it satisfies
4073 * one of the following criteria:
4074 * <ul>
4075 * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
4076 * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
4077 * but is not also a non-breaking space (<code>'\u00A0'</code>,
4078 * <code>'\u2007'</code>, <code>'\u202F'</code>).
4079 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION.
4080 * <li> It is <code>'\u000A'</code>, LINE FEED.
4081 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION.
4082 * <li> It is <code>'\u000C'</code>, FORM FEED.
4083 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN.
4084 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR.
4085 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR.
4086 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR.
4087 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR.
4088 * </ul>
4089 *
4090 * <p><b>Note:</b> This method cannot handle <a
4091 * href="#supplementary"> supplementary characters</a>. To support
4092 * all Unicode characters, including supplementary characters, use
4093 * the {@link #isWhitespace(int)} method.
4094 *
4095 * @param ch the character to be tested.
4096 * @return <code>true</code> if the character is a Java whitespace
4097 * character; <code>false</code> otherwise.
4098 * @see java.lang.Character#isSpaceChar(char)
4099 * @since 1.1
4100 */
4101 public static boolean isWhitespace(char ch) {
4102 return isWhitespace((int)ch);
4103 }
4104
4105 /**
4106 * Determines if the specified character (Unicode code point) is
4107 * white space according to Java. A character is a Java
4108 * whitespace character if and only if it satisfies one of the
4109 * following criteria:
4110 * <ul>
4111 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
4112 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
4113 * but is not also a non-breaking space (<code>'\u00A0'</code>,
4114 * <code>'\u2007'</code>, <code>'\u202F'</code>).
4115 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION.
4116 * <li> It is <code>'\u000A'</code>, LINE FEED.
4117 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION.
4118 * <li> It is <code>'\u000C'</code>, FORM FEED.
4119 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN.
4120 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR.
4121 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR.
4122 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR.
4123 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR.
4124 * </ul>
4125 * <p>
4126 *
4127 * @param codePoint the character (Unicode code point) to be tested.
4128 * @return <code>true</code> if the character is a Java whitespace
4129 * character; <code>false</code> otherwise.
4130 * @see java.lang.Character#isSpaceChar(int)
4131 * @since 1.5
4132 */
4133 public static boolean isWhitespace(int codePoint) {
4134 return CharacterData.of(codePoint).isWhitespace(codePoint);
4135 }
4136
4137 /**
4138 * Determines if the specified character is an ISO control
4139 * character. A character is considered to be an ISO control
4140 * character if its code is in the range <code>'\u0000'</code>
4141 * through <code>'\u001F'</code> or in the range
4142 * <code>'\u007F'</code> through <code>'\u009F'</code>.
4143 *
4144 * <p><b>Note:</b> This method cannot handle <a
4145 * href="#supplementary"> supplementary characters</a>. To support
4146 * all Unicode characters, including supplementary characters, use
4147 * the {@link #isISOControl(int)} method.
4148 *
4149 * @param ch the character to be tested.
4150 * @return <code>true</code> if the character is an ISO control character;
4151 * <code>false</code> otherwise.
4152 *
4153 * @see java.lang.Character#isSpaceChar(char)
4154 * @see java.lang.Character#isWhitespace(char)
4155 * @since 1.1
4156 */
4157 public static boolean isISOControl(char ch) {
4158 return isISOControl((int)ch);
4159 }
4160
4161 /**
4162 * Determines if the referenced character (Unicode code point) is an ISO control
4163 * character. A character is considered to be an ISO control
4164 * character if its code is in the range <code>'\u0000'</code>
4165 * through <code>'\u001F'</code> or in the range
4166 * <code>'\u007F'</code> through <code>'\u009F'</code>.
4167 *
4168 * @param codePoint the character (Unicode code point) to be tested.
4169 * @return <code>true</code> if the character is an ISO control character;
4170 * <code>false</code> otherwise.
4171 * @see java.lang.Character#isSpaceChar(int)
4172 * @see java.lang.Character#isWhitespace(int)
4173 * @since 1.5
4174 */
4175 public static boolean isISOControl(int codePoint) {
4176 return (codePoint >= 0x0000 && codePoint <= 0x001F) ||
4177 (codePoint >= 0x007F && codePoint <= 0x009F);
4178 }
4179
4180 /**
4181 * Returns a value indicating a character's general category.
4182 *
4183 * <p><b>Note:</b> This method cannot handle <a
4184 * href="#supplementary"> supplementary characters</a>. To support
4185 * all Unicode characters, including supplementary characters, use
4186 * the {@link #getType(int)} method.
4187 *
4188 * @param ch the character to be tested.
4189 * @return a value of type <code>int</code> representing the
4190 * character's general category.
4191 * @see java.lang.Character#COMBINING_SPACING_MARK
4192 * @see java.lang.Character#CONNECTOR_PUNCTUATION
4193 * @see java.lang.Character#CONTROL
4194 * @see java.lang.Character#CURRENCY_SYMBOL
4195 * @see java.lang.Character#DASH_PUNCTUATION
4196 * @see java.lang.Character#DECIMAL_DIGIT_NUMBER
4197 * @see java.lang.Character#ENCLOSING_MARK
4198 * @see java.lang.Character#END_PUNCTUATION
4199 * @see java.lang.Character#FINAL_QUOTE_PUNCTUATION
4200 * @see java.lang.Character#FORMAT
4201 * @see java.lang.Character#INITIAL_QUOTE_PUNCTUATION
4202 * @see java.lang.Character#LETTER_NUMBER
4203 * @see java.lang.Character#LINE_SEPARATOR
4204 * @see java.lang.Character#LOWERCASE_LETTER
4205 * @see java.lang.Character#MATH_SYMBOL
4206 * @see java.lang.Character#MODIFIER_LETTER
4207 * @see java.lang.Character#MODIFIER_SYMBOL
4208 * @see java.lang.Character#NON_SPACING_MARK
4209 * @see java.lang.Character#OTHER_LETTER
4210 * @see java.lang.Character#OTHER_NUMBER
4211 * @see java.lang.Character#OTHER_PUNCTUATION
4212 * @see java.lang.Character#OTHER_SYMBOL
4213 * @see java.lang.Character#PARAGRAPH_SEPARATOR
4214 * @see java.lang.Character#PRIVATE_USE
4215 * @see java.lang.Character#SPACE_SEPARATOR
4216 * @see java.lang.Character#START_PUNCTUATION
4217 * @see java.lang.Character#SURROGATE
4218 * @see java.lang.Character#TITLECASE_LETTER
4219 * @see java.lang.Character#UNASSIGNED
4220 * @see java.lang.Character#UPPERCASE_LETTER
4221 * @since 1.1
4222 */
4223 public static int getType(char ch) {
4224 return getType((int)ch);
4225 }
4226
4227 /**
4228 * Returns a value indicating a character's general category.
4229 *
4230 * @param codePoint the character (Unicode code point) to be tested.
4231 * @return a value of type <code>int</code> representing the
4232 * character's general category.
4233 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
4234 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
4235 * @see Character#CONTROL CONTROL
4236 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
4237 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
4238 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
4239 * @see Character#ENCLOSING_MARK ENCLOSING_MARK
4240 * @see Character#END_PUNCTUATION END_PUNCTUATION
4241 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
4242 * @see Character#FORMAT FORMAT
4243 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
4244 * @see Character#LETTER_NUMBER LETTER_NUMBER
4245 * @see Character#LINE_SEPARATOR LINE_SEPARATOR
4246 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
4247 * @see Character#MATH_SYMBOL MATH_SYMBOL
4248 * @see Character#MODIFIER_LETTER MODIFIER_LETTER
4249 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
4250 * @see Character#NON_SPACING_MARK NON_SPACING_MARK
4251 * @see Character#OTHER_LETTER OTHER_LETTER
4252 * @see Character#OTHER_NUMBER OTHER_NUMBER
4253 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
4254 * @see Character#OTHER_SYMBOL OTHER_SYMBOL
4255 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
4256 * @see Character#PRIVATE_USE PRIVATE_USE
4257 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
4258 * @see Character#START_PUNCTUATION START_PUNCTUATION
4259 * @see Character#SURROGATE SURROGATE
4260 * @see Character#TITLECASE_LETTER TITLECASE_LETTER
4261 * @see Character#UNASSIGNED UNASSIGNED
4262 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
4263 * @since 1.5
4264 */
4265 public static int getType(int codePoint) {
4266 return CharacterData.of(codePoint).getType(codePoint);
4267 }
4268
4269 /**
4270 * Determines the character representation for a specific digit in
4271 * the specified radix. If the value of <code>radix</code> is not a
4272 * valid radix, or the value of <code>digit</code> is not a valid
4273 * digit in the specified radix, the null character
4274 * (<code>'\u0000'</code>) is returned.
4275 * <p>
4276 * The <code>radix</code> argument is valid if it is greater than or
4277 * equal to <code>MIN_RADIX</code> and less than or equal to
4278 * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
4279 * <code>0 <=digit < radix</code>.
4280 * <p>
4281 * If the digit is less than 10, then
4282 * <code>'0' + digit</code> is returned. Otherwise, the value
4283 * <code>'a' + digit - 10</code> is returned.
4284 *
4285 * @param digit the number to convert to a character.
4286 * @param radix the radix.
4287 * @return the <code>char</code> representation of the specified digit
4288 * in the specified radix.
4289 * @see java.lang.Character#MIN_RADIX
4290 * @see java.lang.Character#MAX_RADIX
4291 * @see java.lang.Character#digit(char, int)
4292 */
4293 public static char forDigit(int digit, int radix) {
4294 if ((digit >= radix) || (digit < 0)) {
4295 return '\0';
4296 }
4297 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
4298 return '\0';
4299 }
4300 if (digit < 10) {
4301 return (char)('0' + digit);
4302 }
4303 return (char)('a' - 10 + digit);
4304 }
4305
4306 /**
4307 * Returns the Unicode directionality property for the given
4308 * character. Character directionality is used to calculate the
4309 * visual ordering of text. The directionality value of undefined
4310 * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
4311 *
4312 * <p><b>Note:</b> This method cannot handle <a
4313 * href="#supplementary"> supplementary characters</a>. To support
4314 * all Unicode characters, including supplementary characters, use
4315 * the {@link #getDirectionality(int)} method.
4316 *
4317 * @param ch <code>char</code> for which the directionality property
4318 * is requested.
4319 * @return the directionality property of the <code>char</code> value.
4320 *
4321 * @see Character#DIRECTIONALITY_UNDEFINED
4322 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
4323 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
4324 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4325 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
4326 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4327 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4328 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
4329 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4330 * @see Character#DIRECTIONALITY_NONSPACING_MARK
4331 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
4332 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
4333 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
4334 * @see Character#DIRECTIONALITY_WHITESPACE
4335 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
4336 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4337 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4338 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4339 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4340 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4341 * @since 1.4
4342 */
4343 public static byte getDirectionality(char ch) {
4344 return getDirectionality((int)ch);
4345 }
4346
4347 /**
4348 * Returns the Unicode directionality property for the given
4349 * character (Unicode code point). Character directionality is
4350 * used to calculate the visual ordering of text. The
4351 * directionality value of undefined character is {@link
4352 * #DIRECTIONALITY_UNDEFINED}.
4353 *
4354 * @param codePoint the character (Unicode code point) for which
4355 * the directionality property is requested.
4356 * @return the directionality property of the character.
4357 *
4358 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
4359 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
4360 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
4361 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4362 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
4363 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4364 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4365 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
4366 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4367 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
4368 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
4369 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
4370 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
4371 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
4372 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
4373 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4374 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4375 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4376 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4377 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4378 * @since 1.5
4379 */
4380 public static byte getDirectionality(int codePoint) {
4381 return CharacterData.of(codePoint).getDirectionality(codePoint);
4382 }
4383
4384 /**
4385 * Determines whether the character is mirrored according to the
4386 * Unicode specification. Mirrored characters should have their
4387 * glyphs horizontally mirrored when displayed in text that is
4388 * right-to-left. For example, <code>'\u0028'</code> LEFT
4389 * PARENTHESIS is semantically defined to be an <i>opening
4390 * parenthesis</i>. This will appear as a "(" in text that is
4391 * left-to-right but as a ")" in text that is right-to-left.
4392 *
4393 * <p><b>Note:</b> This method cannot handle <a
4394 * href="#supplementary"> supplementary characters</a>. To support
4395 * all Unicode characters, including supplementary characters, use
4396 * the {@link #isMirrored(int)} method.
4397 *
4398 * @param ch <code>char</code> for which the mirrored property is requested
4399 * @return <code>true</code> if the char is mirrored, <code>false</code>
4400 * if the <code>char</code> is not mirrored or is not defined.
4401 * @since 1.4
4402 */
4403 public static boolean isMirrored(char ch) {
4404 return isMirrored((int)ch);
4405 }
4406
4407 /**
4408 * Determines whether the specified character (Unicode code point)
4409 * is mirrored according to the Unicode specification. Mirrored
4410 * characters should have their glyphs horizontally mirrored when
4411 * displayed in text that is right-to-left. For example,
4412 * <code>'\u0028'</code> LEFT PARENTHESIS is semantically
4413 * defined to be an <i>opening parenthesis</i>. This will appear
4414 * as a "(" in text that is left-to-right but as a ")" in text
4415 * that is right-to-left.
4416 *
4417 * @param codePoint the character (Unicode code point) to be tested.
4418 * @return <code>true</code> if the character is mirrored, <code>false</code>
4419 * if the character is not mirrored or is not defined.
4420 * @since 1.5
4421 */
4422 public static boolean isMirrored(int codePoint) {
4423 return CharacterData.of(codePoint).isMirrored(codePoint);
4424 }
4425
4426 /**
4427 * Compares two <code>Character</code> objects numerically.
4428 *
4429 * @param anotherCharacter the <code>Character</code> to be compared.
4430
4431 * @return the value <code>0</code> if the argument <code>Character</code>
4432 * is equal to this <code>Character</code>; a value less than
4433 * <code>0</code> if this <code>Character</code> is numerically less
4434 * than the <code>Character</code> argument; and a value greater than
4435 * <code>0</code> if this <code>Character</code> is numerically greater
4436 * than the <code>Character</code> argument (unsigned comparison).
4437 * Note that this is strictly a numerical comparison; it is not
4438 * locale-dependent.
4439 * @since 1.2
4440 */
4441 public int compareTo(Character anotherCharacter) {
4442 return this.value - anotherCharacter.value;
4443 }
4444
4445 /**
4446 * Converts the character (Unicode code point) argument to uppercase using
4447 * information from the UnicodeData file.
4448 * <p>
4449 *
4450 * @param codePoint the character (Unicode code point) to be converted.
4451 * @return either the uppercase equivalent of the character, if
4452 * any, or an error flag (<code>Character.ERROR</code>)
4453 * that indicates that a 1:M <code>char</code> mapping exists.
4454 * @see java.lang.Character#isLowerCase(char)
4455 * @see java.lang.Character#isUpperCase(char)
4456 * @see java.lang.Character#toLowerCase(char)
4457 * @see java.lang.Character#toTitleCase(char)
4458 * @since 1.4
4459 */
4460 static int toUpperCaseEx(int codePoint) {
4461 assert isValidCodePoint(codePoint);
4462 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
4463 }
4464
4465 /**
4466 * Converts the character (Unicode code point) argument to uppercase using case
4467 * mapping information from the SpecialCasing file in the Unicode
4468 * specification. If a character has no explicit uppercase
4469 * mapping, then the <code>char</code> itself is returned in the
4470 * <code>char[]</code>.
4471 *
4472 * @param codePoint the character (Unicode code point) to be converted.
4473 * @return a <code>char[]</code> with the uppercased character.
4474 * @since 1.4
4475 */
4476 static char[] toUpperCaseCharArray(int codePoint) {
4477 // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
4478 assert isValidCodePoint(codePoint) &&
4479 !isSupplementaryCodePoint(codePoint);
4480 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
4481 }
4482
4483 /**
4484 * The number of bits used to represent a <tt>char</tt> value in unsigned
4485 * binary form.
4486 *
4487 * @since 1.5
4488 */
4489 public static final int SIZE = 16;
4490
4491 /**
4492 * Returns the value obtained by reversing the order of the bytes in the
4493 * specified <tt>char</tt> value.
4494 *
4495 * @return the value obtained by reversing (or, equivalently, swapping)
4496 * the bytes in the specified <tt>char</tt> value.
4497 * @since 1.5
4498 */
4499 public static char reverseBytes(char ch) {
4500 return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
4501 }
4502 }