1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/**
5 *******************************************************************************
6 * Copyright (C) 1996-2016, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10
11package android.icu.lang;
12
13import java.lang.ref.SoftReference;
14import java.util.HashMap;
15import java.util.Iterator;
16import java.util.Locale;
17import java.util.Map;
18
19import android.icu.impl.CaseMapImpl;
20import android.icu.impl.IllegalIcuArgumentException;
21import android.icu.impl.Trie2;
22import android.icu.impl.UBiDiProps;
23import android.icu.impl.UCaseProps;
24import android.icu.impl.UCharacterName;
25import android.icu.impl.UCharacterNameChoice;
26import android.icu.impl.UCharacterProperty;
27import android.icu.impl.UCharacterUtility;
28import android.icu.impl.UPropertyAliases;
29import android.icu.lang.UCharacterEnums.ECharacterCategory;
30import android.icu.lang.UCharacterEnums.ECharacterDirection;
31import android.icu.text.BreakIterator;
32import android.icu.text.Edits;
33import android.icu.text.Normalizer2;
34import android.icu.util.RangeValueIterator;
35import android.icu.util.ULocale;
36import android.icu.util.ValueIterator;
37import android.icu.util.VersionInfo;
38
39/**
40 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
41 *
42 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
43 * These extensions provide support for more Unicode properties.
44 * Each ICU release supports the latest version of Unicode available at that time.
45 *
46 * <p>For some time before Java 5 added support for supplementary Unicode code points,
47 * The ICU UCharacter class and many other ICU classes already supported them.
48 * Some UCharacter methods and constants were widened slightly differently than
49 * how the Character class methods and constants were widened later.
50 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
51 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
52 *
53 * <p>Code points are represented in these API using ints. While it would be
54 * more convenient in Java to have a separate primitive datatype for them,
55 * ints suffice in the meantime.
56 *
57 * <p>Aside from the additions for UTF-16 support, and the updated Unicode
58 * properties, the main differences between UCharacter and Character are:
59 * <ul>
60 * <li> UCharacter is not designed to be a char wrapper and does not have
61 *      APIs to which involves management of that single char.<br>
62 *      These include:
63 *      <ul>
64 *        <li> char charValue(),
65 *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
66 *      </ul>
67 * <li> UCharacter does not include Character APIs that are deprecated, nor
68 *      does it include the Java-specific character information, such as
69 *      boolean isJavaIdentifierPart(char ch).
70 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
71 *      values '10' - '35'. UCharacter also does this in digit and
72 *      getNumericValue, to adhere to the java semantics of these
73 *      methods.  New methods unicodeDigit, and
74 *      getUnicodeNumericValue do not treat the above code points
75 *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
76 * </ul>
77 * <p>
78 * In addition to Java compatibility functions, which calculate derived properties,
79 * this API provides low-level access to the Unicode Character Database.
80 * <p>
81 * Unicode assigns each code point (not just assigned character) values for
82 * many properties.
83 * Most of them are simple boolean flags, or constants from a small enumerated list.
84 * For some properties, values are strings or other relatively more complex types.
85 * <p>
86 * For more information see
87 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
88 * (http://www.unicode.org/ucd/)
89 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
90 * User Guide chapter on Properties</a>
91 * (http://www.icu-project.org/userguide/properties.html).
92 * <p>
93 * There are also functions that provide easy migration from C/POSIX functions
94 * like isblank(). Their use is generally discouraged because the C/POSIX
95 * standards do not define their semantics beyond the ASCII range, which means
96 * that different implementations exhibit very different behavior.
97 * Instead, Unicode properties should be used directly.
98 * <p>
99 * There are also only a few, broad C/POSIX character classes, and they tend
100 * to be used for conflicting purposes. For example, the "isalpha()" class
101 * is sometimes used to determine word boundaries, while a more sophisticated
102 * approach would at least distinguish initial letters from continuation
103 * characters (the latter including combining marks).
104 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
105 * Another example: There is no "istitle()" class for titlecase characters.
106 * <p>
107 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
108 * ICU implements them according to the Standard Recommendations in
109 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
110 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
111 * <p>
112 * API access for C/POSIX character classes is as follows:
113 * <pre>{@code
114 * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
115 * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
116 * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
117 * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
118 *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
119 *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
120 * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
121 * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
122 * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
123 * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
124 * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
125 * - cntrl:     getType(c)==CONTROL
126 * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
127 * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
128 * <p>
129 * The C/POSIX character classes are also available in UnicodeSet patterns,
130 * using patterns like [:graph:] or \p{graph}.
131 *
132 * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions.
133 * Comparison:<ul>
134 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
135 *       most of general categories "Z" (separators) + most whitespace ISO controls
136 *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
137 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
138 * <li> isSpaceChar: just Z (including no-break spaces)</ul>
139 *
140 * <p>
141 * This class is not subclassable.
142 *
143 * @author Syn Wee Quek
144 * @see android.icu.lang.UCharacterEnums
145 */
146
147public final class UCharacter implements ECharacterCategory, ECharacterDirection
148{
149    // public inner classes ----------------------------------------------
150
151    /**
152     * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
153     *
154     * A family of character subsets representing the character blocks in the
155     * Unicode specification, generated from Unicode Data file Blocks.txt.
156     * Character blocks generally define characters used for a specific script
157     * or purpose. A character is contained by at most one Unicode block.
158     *
159     * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU.
160     */
161    public static final class UnicodeBlock extends Character.Subset
162    {
163        // block id corresponding to icu4c -----------------------------------
164
165        /**
166         */
167        public static final int INVALID_CODE_ID = -1;
168        /**
169         */
170        public static final int BASIC_LATIN_ID = 1;
171        /**
172         */
173        public static final int LATIN_1_SUPPLEMENT_ID = 2;
174        /**
175         */
176        public static final int LATIN_EXTENDED_A_ID = 3;
177        /**
178         */
179        public static final int LATIN_EXTENDED_B_ID = 4;
180        /**
181         */
182        public static final int IPA_EXTENSIONS_ID = 5;
183        /**
184         */
185        public static final int SPACING_MODIFIER_LETTERS_ID = 6;
186        /**
187         */
188        public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
189        /**
190         * Unicode 3.2 renames this block to "Greek and Coptic".
191         */
192        public static final int GREEK_ID = 8;
193        /**
194         */
195        public static final int CYRILLIC_ID = 9;
196        /**
197         */
198        public static final int ARMENIAN_ID = 10;
199        /**
200         */
201        public static final int HEBREW_ID = 11;
202        /**
203         */
204        public static final int ARABIC_ID = 12;
205        /**
206         */
207        public static final int SYRIAC_ID = 13;
208        /**
209         */
210        public static final int THAANA_ID = 14;
211        /**
212         */
213        public static final int DEVANAGARI_ID = 15;
214        /**
215         */
216        public static final int BENGALI_ID = 16;
217        /**
218         */
219        public static final int GURMUKHI_ID = 17;
220        /**
221         */
222        public static final int GUJARATI_ID = 18;
223        /**
224         */
225        public static final int ORIYA_ID = 19;
226        /**
227         */
228        public static final int TAMIL_ID = 20;
229        /**
230         */
231        public static final int TELUGU_ID = 21;
232        /**
233         */
234        public static final int KANNADA_ID = 22;
235        /**
236         */
237        public static final int MALAYALAM_ID = 23;
238        /**
239         */
240        public static final int SINHALA_ID = 24;
241        /**
242         */
243        public static final int THAI_ID = 25;
244        /**
245         */
246        public static final int LAO_ID = 26;
247        /**
248         */
249        public static final int TIBETAN_ID = 27;
250        /**
251         */
252        public static final int MYANMAR_ID = 28;
253        /**
254         */
255        public static final int GEORGIAN_ID = 29;
256        /**
257         */
258        public static final int HANGUL_JAMO_ID = 30;
259        /**
260         */
261        public static final int ETHIOPIC_ID = 31;
262        /**
263         */
264        public static final int CHEROKEE_ID = 32;
265        /**
266         */
267        public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
268        /**
269         */
270        public static final int OGHAM_ID = 34;
271        /**
272         */
273        public static final int RUNIC_ID = 35;
274        /**
275         */
276        public static final int KHMER_ID = 36;
277        /**
278         */
279        public static final int MONGOLIAN_ID = 37;
280        /**
281         */
282        public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
283        /**
284         */
285        public static final int GREEK_EXTENDED_ID = 39;
286        /**
287         */
288        public static final int GENERAL_PUNCTUATION_ID = 40;
289        /**
290         */
291        public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
292        /**
293         */
294        public static final int CURRENCY_SYMBOLS_ID = 42;
295        /**
296         * Unicode 3.2 renames this block to "Combining Diacritical Marks for
297         * Symbols".
298         */
299        public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
300        /**
301         */
302        public static final int LETTERLIKE_SYMBOLS_ID = 44;
303        /**
304         */
305        public static final int NUMBER_FORMS_ID = 45;
306        /**
307         */
308        public static final int ARROWS_ID = 46;
309        /**
310         */
311        public static final int MATHEMATICAL_OPERATORS_ID = 47;
312        /**
313         */
314        public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
315        /**
316         */
317        public static final int CONTROL_PICTURES_ID = 49;
318        /**
319         */
320        public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
321        /**
322         */
323        public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
324        /**
325         */
326        public static final int BOX_DRAWING_ID = 52;
327        /**
328         */
329        public static final int BLOCK_ELEMENTS_ID = 53;
330        /**
331         */
332        public static final int GEOMETRIC_SHAPES_ID = 54;
333        /**
334         */
335        public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
336        /**
337         */
338        public static final int DINGBATS_ID = 56;
339        /**
340         */
341        public static final int BRAILLE_PATTERNS_ID = 57;
342        /**
343         */
344        public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
345        /**
346         */
347        public static final int KANGXI_RADICALS_ID = 59;
348        /**
349         */
350        public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
351        /**
352         */
353        public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
354        /**
355         */
356        public static final int HIRAGANA_ID = 62;
357        /**
358         */
359        public static final int KATAKANA_ID = 63;
360        /**
361         */
362        public static final int BOPOMOFO_ID = 64;
363        /**
364         */
365        public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
366        /**
367         */
368        public static final int KANBUN_ID = 66;
369        /**
370         */
371        public static final int BOPOMOFO_EXTENDED_ID = 67;
372        /**
373         */
374        public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
375        /**
376         */
377        public static final int CJK_COMPATIBILITY_ID = 69;
378        /**
379         */
380        public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
381        /**
382         */
383        public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
384        /**
385         */
386        public static final int YI_SYLLABLES_ID = 72;
387        /**
388         */
389        public static final int YI_RADICALS_ID = 73;
390        /**
391         */
392        public static final int HANGUL_SYLLABLES_ID = 74;
393        /**
394         */
395        public static final int HIGH_SURROGATES_ID = 75;
396        /**
397         */
398        public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
399        /**
400         */
401        public static final int LOW_SURROGATES_ID = 77;
402        /**
403         * Same as public static final int PRIVATE_USE.
404         * Until Unicode 3.1.1; the corresponding block name was "Private Use";
405         * and multiple code point ranges had this block.
406         * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
407         * and adds separate blocks for the supplementary PUAs.
408         */
409        public static final int PRIVATE_USE_AREA_ID = 78;
410        /**
411         * Same as public static final int PRIVATE_USE_AREA.
412         * Until Unicode 3.1.1; the corresponding block name was "Private Use";
413         * and multiple code point ranges had this block.
414         * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
415         * and adds separate blocks for the supplementary PUAs.
416         */
417        public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
418        /**
419         */
420        public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
421        /**
422         */
423        public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
424        /**
425         */
426        public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
427        /**
428         */
429        public static final int COMBINING_HALF_MARKS_ID = 82;
430        /**
431         */
432        public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
433        /**
434         */
435        public static final int SMALL_FORM_VARIANTS_ID = 84;
436        /**
437         */
438        public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
439        /**
440         */
441        public static final int SPECIALS_ID = 86;
442        /**
443         */
444        public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
445        /**
446         */
447        public static final int OLD_ITALIC_ID = 88;
448        /**
449         */
450        public static final int GOTHIC_ID = 89;
451        /**
452         */
453        public static final int DESERET_ID = 90;
454        /**
455         */
456        public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
457        /**
458         */
459        public static final int MUSICAL_SYMBOLS_ID = 92;
460        /**
461         */
462        public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
463        /**
464         */
465        public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
466        /**
467         */
468        public static final int
469        CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
470        /**
471         */
472        public static final int TAGS_ID = 96;
473
474        // New blocks in Unicode 3.2
475
476        /**
477         * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
478         */
479        public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
480        /**
481         * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
482         */
483
484        public static final int CYRILLIC_SUPPLEMENT_ID = 97;
485        /**
486         */
487        public static final int TAGALOG_ID = 98;
488        /**
489         */
490        public static final int HANUNOO_ID = 99;
491        /**
492         */
493        public static final int BUHID_ID = 100;
494        /**
495         */
496        public static final int TAGBANWA_ID = 101;
497        /**
498         */
499        public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
500        /**
501         */
502        public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
503        /**
504         */
505        public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
506        /**
507         */
508        public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
509        /**
510         */
511        public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
512        /**
513         */
514        public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
515        /**
516         */
517        public static final int VARIATION_SELECTORS_ID = 108;
518        /**
519         */
520        public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
521        /**
522         */
523        public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
524
525        /**
526         */
527        public static final int LIMBU_ID = 111; /*[1900]*/
528        /**
529         */
530        public static final int TAI_LE_ID = 112; /*[1950]*/
531        /**
532         */
533        public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
534        /**
535         */
536        public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
537        /**
538         */
539        public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
540        /**
541         */
542        public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
543        /**
544         */
545        public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
546        /**
547         */
548        public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
549        /**
550         */
551        public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
552        /**
553         */
554        public static final int UGARITIC_ID = 120; /*[10380]*/
555        /**
556         */
557        public static final int SHAVIAN_ID = 121; /*[10450]*/
558        /**
559         */
560        public static final int OSMANYA_ID = 122; /*[10480]*/
561        /**
562         */
563        public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
564        /**
565         */
566        public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
567        /**
568         */
569        public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
570
571        /* New blocks in Unicode 4.1 */
572
573        /**
574         */
575        public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
576
577        /**
578         */
579        public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
580
581        /**
582         */
583        public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
584
585        /**
586         */
587        public static final int BUGINESE_ID = 129; /*[1A00]*/
588
589        /**
590         */
591        public static final int CJK_STROKES_ID = 130; /*[31C0]*/
592
593        /**
594         */
595        public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
596
597        /**
598         */
599        public static final int COPTIC_ID = 132; /*[2C80]*/
600
601        /**
602         */
603        public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
604
605        /**
606         */
607        public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
608
609        /**
610         */
611        public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
612
613        /**
614         */
615        public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
616
617        /**
618         */
619        public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
620
621        /**
622         */
623        public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
624
625        /**
626         */
627        public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
628
629        /**
630         */
631        public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
632
633        /**
634         */
635        public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
636
637        /**
638         */
639        public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
640
641        /**
642         */
643        public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
644
645        /**
646         */
647        public static final int TIFINAGH_ID = 144; /*[2D30]*/
648
649        /**
650         */
651        public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
652
653        /* New blocks in Unicode 5.0 */
654
655        /**
656         */
657        public static final int NKO_ID = 146; /*[07C0]*/
658        /**
659         */
660        public static final int BALINESE_ID = 147; /*[1B00]*/
661        /**
662         */
663        public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
664        /**
665         */
666        public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
667        /**
668         */
669        public static final int PHAGS_PA_ID = 150; /*[A840]*/
670        /**
671         */
672        public static final int PHOENICIAN_ID = 151; /*[10900]*/
673        /**
674         */
675        public static final int CUNEIFORM_ID = 152; /*[12000]*/
676        /**
677         */
678        public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
679        /**
680         */
681        public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
682
683        /**
684         */
685        public static final int SUNDANESE_ID = 155; /* [1B80] */
686
687        /**
688         */
689        public static final int LEPCHA_ID = 156; /* [1C00] */
690
691        /**
692         */
693        public static final int OL_CHIKI_ID = 157; /* [1C50] */
694
695        /**
696         */
697        public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
698
699        /**
700         */
701        public static final int VAI_ID = 159; /* [A500] */
702
703        /**
704         */
705        public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
706
707        /**
708         */
709        public static final int SAURASHTRA_ID = 161; /* [A880] */
710
711        /**
712         */
713        public static final int KAYAH_LI_ID = 162; /* [A900] */
714
715        /**
716         */
717        public static final int REJANG_ID = 163; /* [A930] */
718
719        /**
720         */
721        public static final int CHAM_ID = 164; /* [AA00] */
722
723        /**
724         */
725        public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
726
727        /**
728         */
729        public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
730
731        /**
732         */
733        public static final int LYCIAN_ID = 167; /* [10280] */
734
735        /**
736         */
737        public static final int CARIAN_ID = 168; /* [102A0] */
738
739        /**
740         */
741        public static final int LYDIAN_ID = 169; /* [10920] */
742
743        /**
744         */
745        public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
746
747        /**
748         */
749        public static final int DOMINO_TILES_ID = 171; /* [1F030] */
750
751        /* New blocks in Unicode 5.2 */
752
753        /***/
754        public static final int SAMARITAN_ID = 172; /*[0800]*/
755        /***/
756        public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
757        /***/
758        public static final int TAI_THAM_ID = 174; /*[1A20]*/
759        /***/
760        public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
761        /***/
762        public static final int LISU_ID = 176; /*[A4D0]*/
763        /***/
764        public static final int BAMUM_ID = 177; /*[A6A0]*/
765        /***/
766        public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
767        /***/
768        public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
769        /***/
770        public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
771        /***/
772        public static final int JAVANESE_ID = 181; /*[A980]*/
773        /***/
774        public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
775        /***/
776        public static final int TAI_VIET_ID = 183; /*[AA80]*/
777        /***/
778        public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
779        /***/
780        public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
781        /***/
782        public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
783        /***/
784        public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
785        /***/
786        public static final int AVESTAN_ID = 188; /*[10B00]*/
787        /***/
788        public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
789        /***/
790        public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
791        /***/
792        public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
793        /***/
794        public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
795        /***/
796        public static final int KAITHI_ID = 193; /*[11080]*/
797        /***/
798        public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
799        /***/
800        public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
801        /***/
802        public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
803        /***/
804        public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
805
806        /* New blocks in Unicode 6.0 */
807
808        /***/
809        public static final int MANDAIC_ID = 198; /*[0840]*/
810        /***/
811        public static final int BATAK_ID = 199; /*[1BC0]*/
812        /***/
813        public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
814        /***/
815        public static final int BRAHMI_ID = 201; /*[11000]*/
816        /***/
817        public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
818        /***/
819        public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
820        /***/
821        public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
822        /***/
823        public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
824        /***/
825        public static final int EMOTICONS_ID = 206; /*[1F600]*/
826        /***/
827        public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
828        /***/
829        public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
830        /***/
831        public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
832
833        /* New blocks in Unicode 6.1 */
834
835        /***/
836        public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
837        /***/
838        public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
839        /***/
840        public static final int CHAKMA_ID = 212; /*[11100]*/
841        /***/
842        public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
843        /***/
844        public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
845        /***/
846        public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
847        /***/
848        public static final int MIAO_ID = 216; /*[16F00]*/
849        /***/
850        public static final int SHARADA_ID = 217; /*[11180]*/
851        /***/
852        public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
853        /***/
854        public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
855        /***/
856        public static final int TAKRI_ID = 220; /*[11680]*/
857
858        /* New blocks in Unicode 7.0 */
859
860        /***/
861        public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
862        /***/
863        public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
864        /***/
865        public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
866        /***/
867        public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
868        /***/
869        public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
870        /***/
871        public static final int ELBASAN_ID = 226; /*[10500]*/
872        /***/
873        public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
874        /***/
875        public static final int GRANTHA_ID = 228; /*[11300]*/
876        /***/
877        public static final int KHOJKI_ID = 229; /*[11200]*/
878        /***/
879        public static final int KHUDAWADI_ID = 230; /*[112B0]*/
880        /***/
881        public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
882        /***/
883        public static final int LINEAR_A_ID = 232; /*[10600]*/
884        /***/
885        public static final int MAHAJANI_ID = 233; /*[11150]*/
886        /***/
887        public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
888        /***/
889        public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
890        /***/
891        public static final int MODI_ID = 236; /*[11600]*/
892        /***/
893        public static final int MRO_ID = 237; /*[16A40]*/
894        /***/
895        public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
896        /***/
897        public static final int NABATAEAN_ID = 239; /*[10880]*/
898        /***/
899        public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
900        /***/
901        public static final int OLD_PERMIC_ID = 241; /*[10350]*/
902        /***/
903        public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
904        /***/
905        public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
906        /***/
907        public static final int PALMYRENE_ID = 244; /*[10860]*/
908        /***/
909        public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
910        /***/
911        public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
912        /***/
913        public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
914        /***/
915        public static final int SIDDHAM_ID = 248; /*[11580]*/
916        /***/
917        public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
918        /***/
919        public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
920        /***/
921        public static final int TIRHUTA_ID = 251; /*[11480]*/
922        /***/
923        public static final int WARANG_CITI_ID = 252; /*[118A0]*/
924
925        /* New blocks in Unicode 8.0 */
926
927        /***/
928        public static final int AHOM_ID = 253; /*[11700]*/
929        /***/
930        public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
931        /***/
932        public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
933        /***/
934        public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
935        /***/
936        public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
937        /***/
938        public static final int HATRAN_ID = 258; /*[108E0]*/
939        /***/
940        public static final int MULTANI_ID = 259; /*[11280]*/
941        /***/
942        public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
943        /***/
944        public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
945        /***/
946        public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
947
948        /* New blocks in Unicode 9.0 */
949
950        /***/
951        public static final int ADLAM_ID = 263; /*[1E900]*/
952        /***/
953        public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
954        /***/
955        public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
956        /***/
957        public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
958        /***/
959        public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
960        /***/
961        public static final int MARCHEN_ID = 268; /*[11C70]*/
962        /***/
963        public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
964        /***/
965        public static final int NEWA_ID = 270; /*[11400]*/
966        /***/
967        public static final int OSAGE_ID = 271; /*[104B0]*/
968        /***/
969        public static final int TANGUT_ID = 272; /*[17000]*/
970        /***/
971        public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
972
973        /**
974         * One more than the highest normal UnicodeBlock value.
975         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
976         *
977         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
978         * @hide unsupported on Android
979         */
980        @Deprecated
981        public static final int COUNT = 274;
982
983        // blocks objects ---------------------------------------------------
984
985        /**
986         * Array of UnicodeBlocks, for easy access in getInstance(int)
987         */
988        private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
989
990        /**
991         */
992        public static final UnicodeBlock NO_BLOCK
993        = new UnicodeBlock("NO_BLOCK", 0);
994
995        /**
996         */
997        public static final UnicodeBlock BASIC_LATIN
998        = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
999        /**
1000         */
1001        public static final UnicodeBlock LATIN_1_SUPPLEMENT
1002        = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1003        /**
1004         */
1005        public static final UnicodeBlock LATIN_EXTENDED_A
1006        = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1007        /**
1008         */
1009        public static final UnicodeBlock LATIN_EXTENDED_B
1010        = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1011        /**
1012         */
1013        public static final UnicodeBlock IPA_EXTENSIONS
1014        = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1015        /**
1016         */
1017        public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1018        = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1019        /**
1020         */
1021        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1022        = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1023        /**
1024         * Unicode 3.2 renames this block to "Greek and Coptic".
1025         */
1026        public static final UnicodeBlock GREEK
1027        = new UnicodeBlock("GREEK", GREEK_ID);
1028        /**
1029         */
1030        public static final UnicodeBlock CYRILLIC
1031        = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1032        /**
1033         */
1034        public static final UnicodeBlock ARMENIAN
1035        = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1036        /**
1037         */
1038        public static final UnicodeBlock HEBREW
1039        = new UnicodeBlock("HEBREW", HEBREW_ID);
1040        /**
1041         */
1042        public static final UnicodeBlock ARABIC
1043        = new UnicodeBlock("ARABIC", ARABIC_ID);
1044        /**
1045         */
1046        public static final UnicodeBlock SYRIAC
1047        = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1048        /**
1049         */
1050        public static final UnicodeBlock THAANA
1051        = new UnicodeBlock("THAANA", THAANA_ID);
1052        /**
1053         */
1054        public static final UnicodeBlock DEVANAGARI
1055        = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1056        /**
1057         */
1058        public static final UnicodeBlock BENGALI
1059        = new UnicodeBlock("BENGALI", BENGALI_ID);
1060        /**
1061         */
1062        public static final UnicodeBlock GURMUKHI
1063        = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1064        /**
1065         */
1066        public static final UnicodeBlock GUJARATI
1067        = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1068        /**
1069         */
1070        public static final UnicodeBlock ORIYA
1071        = new UnicodeBlock("ORIYA", ORIYA_ID);
1072        /**
1073         */
1074        public static final UnicodeBlock TAMIL
1075        = new UnicodeBlock("TAMIL", TAMIL_ID);
1076        /**
1077         */
1078        public static final UnicodeBlock TELUGU
1079        = new UnicodeBlock("TELUGU", TELUGU_ID);
1080        /**
1081         */
1082        public static final UnicodeBlock KANNADA
1083        = new UnicodeBlock("KANNADA", KANNADA_ID);
1084        /**
1085         */
1086        public static final UnicodeBlock MALAYALAM
1087        = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1088        /**
1089         */
1090        public static final UnicodeBlock SINHALA
1091        = new UnicodeBlock("SINHALA", SINHALA_ID);
1092        /**
1093         */
1094        public static final UnicodeBlock THAI
1095        = new UnicodeBlock("THAI", THAI_ID);
1096        /**
1097         */
1098        public static final UnicodeBlock LAO
1099        = new UnicodeBlock("LAO", LAO_ID);
1100        /**
1101         */
1102        public static final UnicodeBlock TIBETAN
1103        = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1104        /**
1105         */
1106        public static final UnicodeBlock MYANMAR
1107        = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1108        /**
1109         */
1110        public static final UnicodeBlock GEORGIAN
1111        = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1112        /**
1113         */
1114        public static final UnicodeBlock HANGUL_JAMO
1115        = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1116        /**
1117         */
1118        public static final UnicodeBlock ETHIOPIC
1119        = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1120        /**
1121         */
1122        public static final UnicodeBlock CHEROKEE
1123        = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1124        /**
1125         */
1126        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1127        = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1128                UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1129        /**
1130         */
1131        public static final UnicodeBlock OGHAM
1132        = new UnicodeBlock("OGHAM", OGHAM_ID);
1133        /**
1134         */
1135        public static final UnicodeBlock RUNIC
1136        = new UnicodeBlock("RUNIC", RUNIC_ID);
1137        /**
1138         */
1139        public static final UnicodeBlock KHMER
1140        = new UnicodeBlock("KHMER", KHMER_ID);
1141        /**
1142         */
1143        public static final UnicodeBlock MONGOLIAN
1144        = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1145        /**
1146         */
1147        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1148        = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1149        /**
1150         */
1151        public static final UnicodeBlock GREEK_EXTENDED
1152        = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1153        /**
1154         */
1155        public static final UnicodeBlock GENERAL_PUNCTUATION
1156        = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1157        /**
1158         */
1159        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1160        = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1161        /**
1162         */
1163        public static final UnicodeBlock CURRENCY_SYMBOLS
1164        = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1165        /**
1166         * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1167         * Symbols".
1168         */
1169        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1170        = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1171        /**
1172         */
1173        public static final UnicodeBlock LETTERLIKE_SYMBOLS
1174        = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1175        /**
1176         */
1177        public static final UnicodeBlock NUMBER_FORMS
1178        = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1179        /**
1180         */
1181        public static final UnicodeBlock ARROWS
1182        = new UnicodeBlock("ARROWS", ARROWS_ID);
1183        /**
1184         */
1185        public static final UnicodeBlock MATHEMATICAL_OPERATORS
1186        = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1187        /**
1188         */
1189        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1190        = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1191        /**
1192         */
1193        public static final UnicodeBlock CONTROL_PICTURES
1194        = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1195        /**
1196         */
1197        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1198        = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1199        /**
1200         */
1201        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1202        = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1203        /**
1204         */
1205        public static final UnicodeBlock BOX_DRAWING
1206        = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1207        /**
1208         */
1209        public static final UnicodeBlock BLOCK_ELEMENTS
1210        = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1211        /**
1212         */
1213        public static final UnicodeBlock GEOMETRIC_SHAPES
1214        = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1215        /**
1216         */
1217        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1218        = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1219        /**
1220         */
1221        public static final UnicodeBlock DINGBATS
1222        = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1223        /**
1224         */
1225        public static final UnicodeBlock BRAILLE_PATTERNS
1226        = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1227        /**
1228         */
1229        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1230        = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1231        /**
1232         */
1233        public static final UnicodeBlock KANGXI_RADICALS
1234        = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1235        /**
1236         */
1237        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1238        = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1239                IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1240        /**
1241         */
1242        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1243        = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1244        /**
1245         */
1246        public static final UnicodeBlock HIRAGANA
1247        = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1248        /**
1249         */
1250        public static final UnicodeBlock KATAKANA
1251        = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1252        /**
1253         */
1254        public static final UnicodeBlock BOPOMOFO
1255        = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1256        /**
1257         */
1258        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1259        = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1260        /**
1261         */
1262        public static final UnicodeBlock KANBUN
1263        = new UnicodeBlock("KANBUN", KANBUN_ID);
1264        /**
1265         */
1266        public static final UnicodeBlock BOPOMOFO_EXTENDED
1267        = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1268        /**
1269         */
1270        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1271        = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1272                ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1273        /**
1274         */
1275        public static final UnicodeBlock CJK_COMPATIBILITY
1276        = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1277        /**
1278         */
1279        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1280        = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1281                CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1282        /**
1283         */
1284        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1285        = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1286        /**
1287         */
1288        public static final UnicodeBlock YI_SYLLABLES
1289        = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1290        /**
1291         */
1292        public static final UnicodeBlock YI_RADICALS
1293        = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1294        /**
1295         */
1296        public static final UnicodeBlock HANGUL_SYLLABLES
1297        = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1298        /**
1299         */
1300        public static final UnicodeBlock HIGH_SURROGATES
1301        = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1302        /**
1303         */
1304        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1305        = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1306        /**
1307         */
1308        public static final UnicodeBlock LOW_SURROGATES
1309        = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1310        /**
1311         * Same as public static final int PRIVATE_USE.
1312         * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1313         * and multiple code point ranges had this block.
1314         * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1315         * and adds separate blocks for the supplementary PUAs.
1316         */
1317        public static final UnicodeBlock PRIVATE_USE_AREA
1318        = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1319        /**
1320         * Same as public static final int PRIVATE_USE_AREA.
1321         * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1322         * and multiple code point ranges had this block.
1323         * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1324         * and adds separate blocks for the supplementary PUAs.
1325         */
1326        public static final UnicodeBlock PRIVATE_USE
1327        = PRIVATE_USE_AREA;
1328        /**
1329         */
1330        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1331        = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1332        /**
1333         */
1334        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1335        = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1336        /**
1337         */
1338        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1339        = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1340        /**
1341         */
1342        public static final UnicodeBlock COMBINING_HALF_MARKS
1343        = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1344        /**
1345         */
1346        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1347        = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1348        /**
1349         */
1350        public static final UnicodeBlock SMALL_FORM_VARIANTS
1351        = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1352        /**
1353         */
1354        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1355        = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1356        /**
1357         */
1358        public static final UnicodeBlock SPECIALS
1359        = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1360        /**
1361         */
1362        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1363        = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1364        /**
1365         */
1366        public static final UnicodeBlock OLD_ITALIC
1367        = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1368        /**
1369         */
1370        public static final UnicodeBlock GOTHIC
1371        = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1372        /**
1373         */
1374        public static final UnicodeBlock DESERET
1375        = new UnicodeBlock("DESERET", DESERET_ID);
1376        /**
1377         */
1378        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1379        = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1380        /**
1381         */
1382        public static final UnicodeBlock MUSICAL_SYMBOLS
1383        = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1384        /**
1385         */
1386        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1387        = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1388                MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1389        /**
1390         */
1391        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1392        = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1393                CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1394        /**
1395         */
1396        public static final UnicodeBlock
1397        CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1398        = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1399                CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1400        /**
1401         */
1402        public static final UnicodeBlock TAGS
1403        = new UnicodeBlock("TAGS", TAGS_ID);
1404
1405        // New blocks in Unicode 3.2
1406
1407        /**
1408         * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1409         */
1410        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1411        = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1412        /**
1413         * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1414         */
1415        public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1416        = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1417        /**
1418         */
1419        public static final UnicodeBlock TAGALOG
1420        = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1421        /**
1422         */
1423        public static final UnicodeBlock HANUNOO
1424        = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1425        /**
1426         */
1427        public static final UnicodeBlock BUHID
1428        = new UnicodeBlock("BUHID", BUHID_ID);
1429        /**
1430         */
1431        public static final UnicodeBlock TAGBANWA
1432        = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1433        /**
1434         */
1435        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1436        = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1437                MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1438        /**
1439         */
1440        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1441        = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1442        /**
1443         */
1444        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1445        = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1446        /**
1447         */
1448        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1449        = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1450                MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1451        /**
1452         */
1453        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1454        = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1455                SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1456        /**
1457         */
1458        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1459        = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1460        /**
1461         */
1462        public static final UnicodeBlock VARIATION_SELECTORS
1463        = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1464        /**
1465         */
1466        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1467        = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1468                SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1469        /**
1470         */
1471        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1472        = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1473                SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1474
1475        /**
1476         */
1477        public static final UnicodeBlock LIMBU
1478        = new UnicodeBlock("LIMBU", LIMBU_ID);
1479        /**
1480         */
1481        public static final UnicodeBlock TAI_LE
1482        = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1483        /**
1484         */
1485        public static final UnicodeBlock KHMER_SYMBOLS
1486        = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1487
1488        /**
1489         */
1490        public static final UnicodeBlock PHONETIC_EXTENSIONS
1491        = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1492
1493        /**
1494         */
1495        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1496        = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1497                MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1498        /**
1499         */
1500        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1501        = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1502        /**
1503         */
1504        public static final UnicodeBlock LINEAR_B_SYLLABARY
1505        = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1506        /**
1507         */
1508        public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1509        = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1510        /**
1511         */
1512        public static final UnicodeBlock AEGEAN_NUMBERS
1513        = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1514        /**
1515         */
1516        public static final UnicodeBlock UGARITIC
1517        = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1518        /**
1519         */
1520        public static final UnicodeBlock SHAVIAN
1521        = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1522        /**
1523         */
1524        public static final UnicodeBlock OSMANYA
1525        = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1526        /**
1527         */
1528        public static final UnicodeBlock CYPRIOT_SYLLABARY
1529        = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1530        /**
1531         */
1532        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1533        = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1534
1535        /**
1536         */
1537        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1538        = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1539
1540        /* New blocks in Unicode 4.1 */
1541
1542        /**
1543         */
1544        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1545                new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1546                        ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1547
1548        /**
1549         */
1550        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1551                new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1552
1553        /**
1554         */
1555        public static final UnicodeBlock ARABIC_SUPPLEMENT =
1556                new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1557
1558        /**
1559         */
1560        public static final UnicodeBlock BUGINESE =
1561                new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1562
1563        /**
1564         */
1565        public static final UnicodeBlock CJK_STROKES =
1566                new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1567
1568        /**
1569         */
1570        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1571                new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1572                        COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1573
1574        /**
1575         */
1576        public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1577
1578        /**
1579         */
1580        public static final UnicodeBlock ETHIOPIC_EXTENDED =
1581                new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1582
1583        /**
1584         */
1585        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1586                new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1587
1588        /**
1589         */
1590        public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1591                new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1592
1593        /**
1594         */
1595        public static final UnicodeBlock GLAGOLITIC =
1596                new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1597
1598        /**
1599         */
1600        public static final UnicodeBlock KHAROSHTHI =
1601                new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1602
1603        /**
1604         */
1605        public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1606                new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1607
1608        /**
1609         */
1610        public static final UnicodeBlock NEW_TAI_LUE =
1611                new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1612
1613        /**
1614         */
1615        public static final UnicodeBlock OLD_PERSIAN =
1616                new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1617
1618        /**
1619         */
1620        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1621                new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1622                        PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1623
1624        /**
1625         */
1626        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1627                new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1628
1629        /**
1630         */
1631        public static final UnicodeBlock SYLOTI_NAGRI =
1632                new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1633
1634        /**
1635         */
1636        public static final UnicodeBlock TIFINAGH =
1637                new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1638
1639        /**
1640         */
1641        public static final UnicodeBlock VERTICAL_FORMS =
1642                new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1643
1644        /**
1645         */
1646        public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1647        /**
1648         */
1649        public static final UnicodeBlock BALINESE =
1650                new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1651        /**
1652         */
1653        public static final UnicodeBlock LATIN_EXTENDED_C =
1654                new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1655        /**
1656         */
1657        public static final UnicodeBlock LATIN_EXTENDED_D =
1658                new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1659        /**
1660         */
1661        public static final UnicodeBlock PHAGS_PA =
1662                new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1663        /**
1664         */
1665        public static final UnicodeBlock PHOENICIAN =
1666                new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1667        /**
1668         */
1669        public static final UnicodeBlock CUNEIFORM =
1670                new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1671        /**
1672         */
1673        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1674                new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1675                        CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1676        /**
1677         */
1678        public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1679                new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1680
1681        /**
1682         */
1683        public static final UnicodeBlock SUNDANESE =
1684                new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1685
1686        /**
1687         */
1688        public static final UnicodeBlock LEPCHA =
1689                new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1690
1691        /**
1692         */
1693        public static final UnicodeBlock OL_CHIKI =
1694                new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1695
1696        /**
1697         */
1698        public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1699                new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
1700
1701        /**
1702         */
1703        public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
1704
1705        /**
1706         */
1707        public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1708                new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
1709
1710        /**
1711         */
1712        public static final UnicodeBlock SAURASHTRA =
1713                new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
1714
1715        /**
1716         */
1717        public static final UnicodeBlock KAYAH_LI =
1718                new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
1719
1720        /**
1721         */
1722        public static final UnicodeBlock REJANG =
1723                new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
1724
1725        /**
1726         */
1727        public static final UnicodeBlock CHAM =
1728                new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
1729
1730        /**
1731         */
1732        public static final UnicodeBlock ANCIENT_SYMBOLS =
1733                new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
1734
1735        /**
1736         */
1737        public static final UnicodeBlock PHAISTOS_DISC =
1738                new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
1739
1740        /**
1741         */
1742        public static final UnicodeBlock LYCIAN =
1743                new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
1744
1745        /**
1746         */
1747        public static final UnicodeBlock CARIAN =
1748                new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
1749
1750        /**
1751         */
1752        public static final UnicodeBlock LYDIAN =
1753                new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
1754
1755        /**
1756         */
1757        public static final UnicodeBlock MAHJONG_TILES =
1758                new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
1759
1760        /**
1761         */
1762        public static final UnicodeBlock DOMINO_TILES =
1763                new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
1764
1765        /* New blocks in Unicode 5.2 */
1766
1767        /***/
1768        public static final UnicodeBlock SAMARITAN =
1769                new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
1770        /***/
1771        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1772                new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1773                        UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
1774        /***/
1775        public static final UnicodeBlock TAI_THAM =
1776                new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
1777        /***/
1778        public static final UnicodeBlock VEDIC_EXTENSIONS =
1779                new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
1780        /***/
1781        public static final UnicodeBlock LISU =
1782                new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
1783        /***/
1784        public static final UnicodeBlock BAMUM =
1785                new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
1786        /***/
1787        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
1788                new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
1789        /***/
1790        public static final UnicodeBlock DEVANAGARI_EXTENDED =
1791                new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
1792        /***/
1793        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
1794                new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
1795        /***/
1796        public static final UnicodeBlock JAVANESE =
1797                new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
1798        /***/
1799        public static final UnicodeBlock MYANMAR_EXTENDED_A =
1800                new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
1801        /***/
1802        public static final UnicodeBlock TAI_VIET =
1803                new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
1804        /***/
1805        public static final UnicodeBlock MEETEI_MAYEK =
1806                new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
1807        /***/
1808        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
1809                new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
1810        /***/
1811        public static final UnicodeBlock IMPERIAL_ARAMAIC =
1812                new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
1813        /***/
1814        public static final UnicodeBlock OLD_SOUTH_ARABIAN =
1815                new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
1816        /***/
1817        public static final UnicodeBlock AVESTAN =
1818                new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
1819        /***/
1820        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
1821                new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
1822        /***/
1823        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
1824                new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
1825        /***/
1826        public static final UnicodeBlock OLD_TURKIC =
1827                new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
1828        /***/
1829        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
1830                new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
1831        /***/
1832        public static final UnicodeBlock KAITHI =
1833                new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
1834        /***/
1835        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
1836                new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
1837        /***/
1838        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
1839                new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
1840                        ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
1841        /***/
1842        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
1843                new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
1844                        ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
1845        /***/
1846        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
1847                new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
1848                        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
1849
1850        /* New blocks in Unicode 6.0 */
1851
1852        /***/
1853        public static final UnicodeBlock MANDAIC =
1854                new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
1855        /***/
1856        public static final UnicodeBlock BATAK =
1857                new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
1858        /***/
1859        public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
1860                new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
1861        /***/
1862        public static final UnicodeBlock BRAHMI =
1863                new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
1864        /***/
1865        public static final UnicodeBlock BAMUM_SUPPLEMENT =
1866                new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
1867        /***/
1868        public static final UnicodeBlock KANA_SUPPLEMENT =
1869                new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
1870        /***/
1871        public static final UnicodeBlock PLAYING_CARDS =
1872                new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
1873        /***/
1874        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
1875                new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
1876                        MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
1877        /***/
1878        public static final UnicodeBlock EMOTICONS =
1879                new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
1880        /***/
1881        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
1882                new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
1883        /***/
1884        public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
1885                new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
1886        /***/
1887        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
1888                new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
1889                        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
1890
1891        /* New blocks in Unicode 6.1 */
1892
1893        /***/
1894        public static final UnicodeBlock ARABIC_EXTENDED_A =
1895                new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
1896        /***/
1897        public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
1898                new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
1899        /***/
1900        public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
1901        /***/
1902        public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
1903                new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
1904        /***/
1905        public static final UnicodeBlock MEROITIC_CURSIVE =
1906                new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
1907        /***/
1908        public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
1909                new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
1910        /***/
1911        public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
1912        /***/
1913        public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
1914        /***/
1915        public static final UnicodeBlock SORA_SOMPENG =
1916                new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
1917        /***/
1918        public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
1919                new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
1920        /***/
1921        public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
1922
1923        /* New blocks in Unicode 7.0 */
1924
1925        /***/
1926        public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
1927        /***/
1928        public static final UnicodeBlock CAUCASIAN_ALBANIAN =
1929                new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
1930        /***/
1931        public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
1932                new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
1933        /***/
1934        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
1935                new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
1936        /***/
1937        public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
1938        /***/
1939        public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
1940        /***/
1941        public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
1942                new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
1943        /***/
1944        public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
1945        /***/
1946        public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
1947        /***/
1948        public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
1949        /***/
1950        public static final UnicodeBlock LATIN_EXTENDED_E =
1951                new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
1952        /***/
1953        public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
1954        /***/
1955        public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
1956        /***/
1957        public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
1958        /***/
1959        public static final UnicodeBlock MENDE_KIKAKUI =
1960                new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
1961        /***/
1962        public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
1963        /***/
1964        public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
1965        /***/
1966        public static final UnicodeBlock MYANMAR_EXTENDED_B =
1967                new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
1968        /***/
1969        public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
1970        /***/
1971        public static final UnicodeBlock OLD_NORTH_ARABIAN =
1972                new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
1973        /***/
1974        public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
1975        /***/
1976        public static final UnicodeBlock ORNAMENTAL_DINGBATS =
1977                new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
1978        /***/
1979        public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
1980        /***/
1981        public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
1982        /***/
1983        public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
1984        /***/
1985        public static final UnicodeBlock PSALTER_PAHLAVI =
1986                new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
1987        /***/
1988        public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
1989                new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
1990        /***/
1991        public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
1992        /***/
1993        public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
1994                new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
1995        /***/
1996        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
1997                new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
1998        /***/
1999        public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2000        /***/
2001        public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2002
2003        /* New blocks in Unicode 8.0 */
2004
2005        /***/
2006        public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2007        /***/
2008        public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2009                new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2010        /***/
2011        public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2012                new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2013        /***/
2014        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2015                new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2016                        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2017        /***/
2018        public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2019                new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2020        /***/
2021        public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2022        /***/
2023        public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2024        /***/
2025        public static final UnicodeBlock OLD_HUNGARIAN =
2026                new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2027        /***/
2028        public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2029                new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2030                        SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2031        /***/
2032        public static final UnicodeBlock SUTTON_SIGNWRITING =
2033                new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2034
2035        /* New blocks in Unicode 9.0 */
2036
2037        /***/
2038        public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2039        /***/
2040        public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2041        /***/
2042        public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2043                new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2044        /***/
2045        public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2046                new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2047        /***/
2048        public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2049                new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2050        /***/
2051        public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2052        /***/
2053        public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2054                new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2055        /***/
2056        public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2057        /***/
2058        public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2059        /***/
2060        public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2061        /***/
2062        public static final UnicodeBlock TANGUT_COMPONENTS =
2063                new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2064
2065        /**
2066         */
2067        public static final UnicodeBlock INVALID_CODE
2068        = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2069
2070        static {
2071            for (int blockId = 0; blockId < COUNT; ++blockId) {
2072                if (BLOCKS_[blockId] == null) {
2073                    throw new java.lang.IllegalStateException(
2074                            "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2075                }
2076            }
2077        }
2078
2079        // public methods --------------------------------------------------
2080
2081        /**
2082         * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID.
2083         * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2084         * @param id UnicodeBlock ID
2085         * @return the only instance of the UnicodeBlock with the argument ID
2086         *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2087         *         returned.
2088         */
2089        public static UnicodeBlock getInstance(int id)
2090        {
2091            if (id >= 0 && id < BLOCKS_.length) {
2092                return BLOCKS_[id];
2093            }
2094            return INVALID_CODE;
2095        }
2096
2097        /**
2098         * Returns the Unicode allocation block that contains the code point,
2099         * or null if the code point is not a member of a defined block.
2100         * @param ch code point to be tested
2101         * @return the Unicode allocation block that contains the code point
2102         */
2103        public static UnicodeBlock of(int ch)
2104        {
2105            if (ch > MAX_VALUE) {
2106                return INVALID_CODE;
2107            }
2108
2109            return UnicodeBlock.getInstance(
2110                    UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2111        }
2112
2113        /**
2114         * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2115         * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike
2116         * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2117         * against the official UCD name and the Java block name
2118         * (ignoring case).
2119         * @param blockName the name of the block to match
2120         * @return the UnicodeBlock with that name
2121         * @throws IllegalArgumentException if the blockName could not be matched
2122         */
2123        public static final UnicodeBlock forName(String blockName) {
2124            Map<String, UnicodeBlock> m = null;
2125            if (mref != null) {
2126                m = mref.get();
2127            }
2128            if (m == null) {
2129                m = new HashMap<String, UnicodeBlock>(BLOCKS_.length);
2130                for (int i = 0; i < BLOCKS_.length; ++i) {
2131                    UnicodeBlock b = BLOCKS_[i];
2132                    String name = trimBlockName(
2133                            getPropertyValueName(UProperty.BLOCK, b.getID(),
2134                                    UProperty.NameChoice.LONG));
2135                    m.put(name, b);
2136                }
2137                mref = new SoftReference<Map<String, UnicodeBlock>>(m);
2138            }
2139            UnicodeBlock b = m.get(trimBlockName(blockName));
2140            if (b == null) {
2141                throw new IllegalArgumentException();
2142            }
2143            return b;
2144        }
2145        private static SoftReference<Map<String, UnicodeBlock>> mref;
2146
2147        private static String trimBlockName(String name) {
2148            String upper = name.toUpperCase(Locale.ENGLISH);
2149            StringBuilder result = new StringBuilder(upper.length());
2150            for (int i = 0; i < upper.length(); i++) {
2151                char c = upper.charAt(i);
2152                if (c != ' ' && c != '_' && c != '-') {
2153                    result.append(c);
2154                }
2155            }
2156            return result.toString();
2157        }
2158
2159        /**
2160         * {icu} Returns the type ID of this Unicode block
2161         * @return integer type ID of this Unicode block
2162         */
2163        public int getID()
2164        {
2165            return m_id_;
2166        }
2167
2168        // private data members ---------------------------------------------
2169
2170        /**
2171         * Identification code for this UnicodeBlock
2172         */
2173        private int m_id_;
2174
2175        // private constructor ----------------------------------------------
2176
2177        /**
2178         * UnicodeBlock constructor
2179         * @param name name of this UnicodeBlock
2180         * @param id unique id of this UnicodeBlock
2181         * @exception NullPointerException if name is <code>null</code>
2182         */
2183        private UnicodeBlock(String name, int id)
2184        {
2185            super(name);
2186            m_id_ = id;
2187            if (id >= 0) {
2188                BLOCKS_[id] = this;
2189            }
2190        }
2191    }
2192
2193    /**
2194     * East Asian Width constants.
2195     * @see UProperty#EAST_ASIAN_WIDTH
2196     * @see UCharacter#getIntPropertyValue
2197     */
2198    public static interface EastAsianWidth
2199    {
2200        /**
2201         */
2202        public static final int NEUTRAL = 0;
2203        /**
2204         */
2205        public static final int AMBIGUOUS = 1;
2206        /**
2207         */
2208        public static final int HALFWIDTH = 2;
2209        /**
2210         */
2211        public static final int FULLWIDTH = 3;
2212        /**
2213         */
2214        public static final int NARROW = 4;
2215        /**
2216         */
2217        public static final int WIDE = 5;
2218        /**
2219         * One more than the highest normal EastAsianWidth value.
2220         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2221         *
2222         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2223         * @hide unsupported on Android
2224         */
2225        @Deprecated
2226        public static final int COUNT = 6;
2227    }
2228
2229    /**
2230     * Decomposition Type constants.
2231     * @see UProperty#DECOMPOSITION_TYPE
2232     */
2233    public static interface DecompositionType
2234    {
2235        /**
2236         */
2237        public static final int NONE = 0;
2238        /**
2239         */
2240        public static final int CANONICAL = 1;
2241        /**
2242         */
2243        public static final int COMPAT = 2;
2244        /**
2245         */
2246        public static final int CIRCLE = 3;
2247        /**
2248         */
2249        public static final int FINAL = 4;
2250        /**
2251         */
2252        public static final int FONT = 5;
2253        /**
2254         */
2255        public static final int FRACTION = 6;
2256        /**
2257         */
2258        public static final int INITIAL = 7;
2259        /**
2260         */
2261        public static final int ISOLATED = 8;
2262        /**
2263         */
2264        public static final int MEDIAL = 9;
2265        /**
2266         */
2267        public static final int NARROW = 10;
2268        /**
2269         */
2270        public static final int NOBREAK = 11;
2271        /**
2272         */
2273        public static final int SMALL = 12;
2274        /**
2275         */
2276        public static final int SQUARE = 13;
2277        /**
2278         */
2279        public static final int SUB = 14;
2280        /**
2281         */
2282        public static final int SUPER = 15;
2283        /**
2284         */
2285        public static final int VERTICAL = 16;
2286        /**
2287         */
2288        public static final int WIDE = 17;
2289        /**
2290         * One more than the highest normal DecompositionType value.
2291         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
2292         *
2293         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2294         * @hide unsupported on Android
2295         */
2296        @Deprecated
2297        public static final int COUNT = 18;
2298    }
2299
2300    /**
2301     * Joining Type constants.
2302     * @see UProperty#JOINING_TYPE
2303     */
2304    public static interface JoiningType
2305    {
2306        /**
2307         */
2308        public static final int NON_JOINING = 0;
2309        /**
2310         */
2311        public static final int JOIN_CAUSING = 1;
2312        /**
2313         */
2314        public static final int DUAL_JOINING = 2;
2315        /**
2316         */
2317        public static final int LEFT_JOINING = 3;
2318        /**
2319         */
2320        public static final int RIGHT_JOINING = 4;
2321        /**
2322         */
2323        public static final int TRANSPARENT = 5;
2324        /**
2325         * One more than the highest normal JoiningType value.
2326         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
2327         *
2328         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2329         * @hide unsupported on Android
2330         */
2331        @Deprecated
2332        public static final int COUNT = 6;
2333    }
2334
2335    /**
2336     * Joining Group constants.
2337     * @see UProperty#JOINING_GROUP
2338     */
2339    public static interface JoiningGroup
2340    {
2341        /**
2342         */
2343        public static final int NO_JOINING_GROUP = 0;
2344        /**
2345         */
2346        public static final int AIN = 1;
2347        /**
2348         */
2349        public static final int ALAPH = 2;
2350        /**
2351         */
2352        public static final int ALEF = 3;
2353        /**
2354         */
2355        public static final int BEH = 4;
2356        /**
2357         */
2358        public static final int BETH = 5;
2359        /**
2360         */
2361        public static final int DAL = 6;
2362        /**
2363         */
2364        public static final int DALATH_RISH = 7;
2365        /**
2366         */
2367        public static final int E = 8;
2368        /**
2369         */
2370        public static final int FEH = 9;
2371        /**
2372         */
2373        public static final int FINAL_SEMKATH = 10;
2374        /**
2375         */
2376        public static final int GAF = 11;
2377        /**
2378         */
2379        public static final int GAMAL = 12;
2380        /**
2381         */
2382        public static final int HAH = 13;
2383        /***/
2384        public static final int TEH_MARBUTA_GOAL = 14;
2385        /**
2386         */
2387        public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2388        /**
2389         */
2390        public static final int HE = 15;
2391        /**
2392         */
2393        public static final int HEH = 16;
2394        /**
2395         */
2396        public static final int HEH_GOAL = 17;
2397        /**
2398         */
2399        public static final int HETH = 18;
2400        /**
2401         */
2402        public static final int KAF = 19;
2403        /**
2404         */
2405        public static final int KAPH = 20;
2406        /**
2407         */
2408        public static final int KNOTTED_HEH = 21;
2409        /**
2410         */
2411        public static final int LAM = 22;
2412        /**
2413         */
2414        public static final int LAMADH = 23;
2415        /**
2416         */
2417        public static final int MEEM = 24;
2418        /**
2419         */
2420        public static final int MIM = 25;
2421        /**
2422         */
2423        public static final int NOON = 26;
2424        /**
2425         */
2426        public static final int NUN = 27;
2427        /**
2428         */
2429        public static final int PE = 28;
2430        /**
2431         */
2432        public static final int QAF = 29;
2433        /**
2434         */
2435        public static final int QAPH = 30;
2436        /**
2437         */
2438        public static final int REH = 31;
2439        /**
2440         */
2441        public static final int REVERSED_PE = 32;
2442        /**
2443         */
2444        public static final int SAD = 33;
2445        /**
2446         */
2447        public static final int SADHE = 34;
2448        /**
2449         */
2450        public static final int SEEN = 35;
2451        /**
2452         */
2453        public static final int SEMKATH = 36;
2454        /**
2455         */
2456        public static final int SHIN = 37;
2457        /**
2458         */
2459        public static final int SWASH_KAF = 38;
2460        /**
2461         */
2462        public static final int SYRIAC_WAW = 39;
2463        /**
2464         */
2465        public static final int TAH = 40;
2466        /**
2467         */
2468        public static final int TAW = 41;
2469        /**
2470         */
2471        public static final int TEH_MARBUTA = 42;
2472        /**
2473         */
2474        public static final int TETH = 43;
2475        /**
2476         */
2477        public static final int WAW = 44;
2478        /**
2479         */
2480        public static final int YEH = 45;
2481        /**
2482         */
2483        public static final int YEH_BARREE = 46;
2484        /**
2485         */
2486        public static final int YEH_WITH_TAIL = 47;
2487        /**
2488         */
2489        public static final int YUDH = 48;
2490        /**
2491         */
2492        public static final int YUDH_HE = 49;
2493        /**
2494         */
2495        public static final int ZAIN = 50;
2496        /**
2497         */
2498        public static final int FE = 51;
2499        /**
2500         */
2501        public static final int KHAPH = 52;
2502        /**
2503         */
2504        public static final int ZHAIN = 53;
2505        /**
2506         */
2507        public static final int BURUSHASKI_YEH_BARREE = 54;
2508        /***/
2509        public static final int FARSI_YEH = 55;
2510        /***/
2511        public static final int NYA = 56;
2512        /***/
2513        public static final int ROHINGYA_YEH = 57;
2514
2515        /***/
2516        public static final int MANICHAEAN_ALEPH = 58;
2517        /***/
2518        public static final int MANICHAEAN_AYIN = 59;
2519        /***/
2520        public static final int MANICHAEAN_BETH = 60;
2521        /***/
2522        public static final int MANICHAEAN_DALETH = 61;
2523        /***/
2524        public static final int MANICHAEAN_DHAMEDH = 62;
2525        /***/
2526        public static final int MANICHAEAN_FIVE = 63;
2527        /***/
2528        public static final int MANICHAEAN_GIMEL = 64;
2529        /***/
2530        public static final int MANICHAEAN_HETH = 65;
2531        /***/
2532        public static final int MANICHAEAN_HUNDRED = 66;
2533        /***/
2534        public static final int MANICHAEAN_KAPH = 67;
2535        /***/
2536        public static final int MANICHAEAN_LAMEDH = 68;
2537        /***/
2538        public static final int MANICHAEAN_MEM = 69;
2539        /***/
2540        public static final int MANICHAEAN_NUN = 70;
2541        /***/
2542        public static final int MANICHAEAN_ONE = 71;
2543        /***/
2544        public static final int MANICHAEAN_PE = 72;
2545        /***/
2546        public static final int MANICHAEAN_QOPH = 73;
2547        /***/
2548        public static final int MANICHAEAN_RESH = 74;
2549        /***/
2550        public static final int MANICHAEAN_SADHE = 75;
2551        /***/
2552        public static final int MANICHAEAN_SAMEKH = 76;
2553        /***/
2554        public static final int MANICHAEAN_TAW = 77;
2555        /***/
2556        public static final int MANICHAEAN_TEN = 78;
2557        /***/
2558        public static final int MANICHAEAN_TETH = 79;
2559        /***/
2560        public static final int MANICHAEAN_THAMEDH = 80;
2561        /***/
2562        public static final int MANICHAEAN_TWENTY = 81;
2563        /***/
2564        public static final int MANICHAEAN_WAW = 82;
2565        /***/
2566        public static final int MANICHAEAN_YODH = 83;
2567        /***/
2568        public static final int MANICHAEAN_ZAYIN = 84;
2569        /***/
2570        public static final int STRAIGHT_WAW = 85;
2571
2572        /***/
2573        public static final int AFRICAN_FEH = 86;
2574        /***/
2575        public static final int AFRICAN_NOON = 87;
2576        /***/
2577        public static final int AFRICAN_QAF = 88;
2578
2579        /**
2580         * One more than the highest normal JoiningGroup value.
2581         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
2582         *
2583         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2584         * @hide unsupported on Android
2585         */
2586        @Deprecated
2587        public static final int COUNT = 89;
2588    }
2589
2590    /**
2591     * Grapheme Cluster Break constants.
2592     * @see UProperty#GRAPHEME_CLUSTER_BREAK
2593     */
2594    public static interface GraphemeClusterBreak {
2595        /**
2596         */
2597        public static final int OTHER = 0;
2598        /**
2599         */
2600        public static final int CONTROL = 1;
2601        /**
2602         */
2603        public static final int CR = 2;
2604        /**
2605         */
2606        public static final int EXTEND = 3;
2607        /**
2608         */
2609        public static final int L = 4;
2610        /**
2611         */
2612        public static final int LF = 5;
2613        /**
2614         */
2615        public static final int LV = 6;
2616        /**
2617         */
2618        public static final int LVT = 7;
2619        /**
2620         */
2621        public static final int T = 8;
2622        /**
2623         */
2624        public static final int V = 9;
2625        /**
2626         */
2627        public static final int SPACING_MARK = 10;
2628        /**
2629         */
2630        public static final int PREPEND = 11;
2631        /***/
2632        public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2633        /***/
2634        public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2635        /***/
2636        public static final int E_BASE_GAZ = 14;      /*[EBG]*/
2637        /***/
2638        public static final int E_MODIFIER = 15;      /*[EM]*/
2639        /***/
2640        public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
2641        /***/
2642        public static final int ZWJ = 17;             /*[ZWJ]*/
2643        /**
2644         * One more than the highest normal GraphemeClusterBreak value.
2645         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
2646         *
2647         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2648         * @hide unsupported on Android
2649         */
2650        @Deprecated
2651        public static final int COUNT = 18;
2652    }
2653
2654    /**
2655     * Word Break constants.
2656     * @see UProperty#WORD_BREAK
2657     */
2658    public static interface WordBreak {
2659        /**
2660         */
2661        public static final int OTHER = 0;
2662        /**
2663         */
2664        public static final int ALETTER = 1;
2665        /**
2666         */
2667        public static final int FORMAT = 2;
2668        /**
2669         */
2670        public static final int KATAKANA = 3;
2671        /**
2672         */
2673        public static final int MIDLETTER = 4;
2674        /**
2675         */
2676        public static final int MIDNUM = 5;
2677        /**
2678         */
2679        public static final int NUMERIC = 6;
2680        /**
2681         */
2682        public static final int EXTENDNUMLET = 7;
2683        /**
2684         */
2685        public static final int CR = 8;
2686        /**
2687         */
2688        public static final int EXTEND = 9;
2689        /**
2690         */
2691        public static final int LF = 10;
2692        /**
2693         */
2694        public static final int MIDNUMLET = 11;
2695        /**
2696         */
2697        public static final int NEWLINE = 12;
2698        /***/
2699        public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2700        /***/
2701        public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
2702        /***/
2703        public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
2704        /***/
2705        public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
2706        /***/
2707        public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2708        /***/
2709        public static final int E_BASE_GAZ = 18;       /*[EBG]*/
2710        /***/
2711        public static final int E_MODIFIER = 19;       /*[EM]*/
2712        /***/
2713        public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
2714        /***/
2715        public static final int ZWJ = 21;              /*[ZWJ]*/
2716        /**
2717         * One more than the highest normal WordBreak value.
2718         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
2719         *
2720         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2721         * @hide unsupported on Android
2722         */
2723        @Deprecated
2724        public static final int COUNT = 22;
2725    }
2726
2727    /**
2728     * Sentence Break constants.
2729     * @see UProperty#SENTENCE_BREAK
2730     */
2731    public static interface SentenceBreak {
2732        /**
2733         */
2734        public static final int OTHER = 0;
2735        /**
2736         */
2737        public static final int ATERM = 1;
2738        /**
2739         */
2740        public static final int CLOSE = 2;
2741        /**
2742         */
2743        public static final int FORMAT = 3;
2744        /**
2745         */
2746        public static final int LOWER = 4;
2747        /**
2748         */
2749        public static final int NUMERIC = 5;
2750        /**
2751         */
2752        public static final int OLETTER = 6;
2753        /**
2754         */
2755        public static final int SEP = 7;
2756        /**
2757         */
2758        public static final int SP = 8;
2759        /**
2760         */
2761        public static final int STERM = 9;
2762        /**
2763         */
2764        public static final int UPPER = 10;
2765        /**
2766         */
2767        public static final int CR = 11;
2768        /**
2769         */
2770        public static final int EXTEND = 12;
2771        /**
2772         */
2773        public static final int LF = 13;
2774        /**
2775         */
2776        public static final int SCONTINUE = 14;
2777        /**
2778         * One more than the highest normal SentenceBreak value.
2779         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
2780         *
2781         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2782         * @hide unsupported on Android
2783         */
2784        @Deprecated
2785        public static final int COUNT = 15;
2786    }
2787
2788    /**
2789     * Line Break constants.
2790     * @see UProperty#LINE_BREAK
2791     */
2792    public static interface LineBreak
2793    {
2794        /**
2795         */
2796        public static final int UNKNOWN = 0;
2797        /**
2798         */
2799        public static final int AMBIGUOUS = 1;
2800        /**
2801         */
2802        public static final int ALPHABETIC = 2;
2803        /**
2804         */
2805        public static final int BREAK_BOTH = 3;
2806        /**
2807         */
2808        public static final int BREAK_AFTER = 4;
2809        /**
2810         */
2811        public static final int BREAK_BEFORE = 5;
2812        /**
2813         */
2814        public static final int MANDATORY_BREAK = 6;
2815        /**
2816         */
2817        public static final int CONTINGENT_BREAK = 7;
2818        /**
2819         */
2820        public static final int CLOSE_PUNCTUATION = 8;
2821        /**
2822         */
2823        public static final int COMBINING_MARK = 9;
2824        /**
2825         */
2826        public static final int CARRIAGE_RETURN = 10;
2827        /**
2828         */
2829        public static final int EXCLAMATION = 11;
2830        /**
2831         */
2832        public static final int GLUE = 12;
2833        /**
2834         */
2835        public static final int HYPHEN = 13;
2836        /**
2837         */
2838        public static final int IDEOGRAPHIC = 14;
2839        /**
2840         * @see #INSEPARABLE
2841         */
2842        public static final int INSEPERABLE = 15;
2843        /**
2844         * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
2845         */
2846        public static final int INSEPARABLE = 15;
2847        /**
2848         */
2849        public static final int INFIX_NUMERIC = 16;
2850        /**
2851         */
2852        public static final int LINE_FEED = 17;
2853        /**
2854         */
2855        public static final int NONSTARTER = 18;
2856        /**
2857         */
2858        public static final int NUMERIC = 19;
2859        /**
2860         */
2861        public static final int OPEN_PUNCTUATION = 20;
2862        /**
2863         */
2864        public static final int POSTFIX_NUMERIC = 21;
2865        /**
2866         */
2867        public static final int PREFIX_NUMERIC = 22;
2868        /**
2869         */
2870        public static final int QUOTATION = 23;
2871        /**
2872         */
2873        public static final int COMPLEX_CONTEXT = 24;
2874        /**
2875         */
2876        public static final int SURROGATE = 25;
2877        /**
2878         */
2879        public static final int SPACE = 26;
2880        /**
2881         */
2882        public static final int BREAK_SYMBOLS = 27;
2883        /**
2884         */
2885        public static final int ZWSPACE = 28;
2886        /**
2887         */
2888        public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
2889        /**
2890         */
2891        public static final int WORD_JOINER = 30;      /*[WJ]*/
2892        /**
2893         */
2894        public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
2895        /**
2896         */
2897        public static final int H3 = 32;
2898        /**
2899         */
2900        public static final int JL = 33;
2901        /**
2902         */
2903        public static final int JT = 34;
2904        /**
2905         */
2906        public static final int JV = 35;
2907        /***/
2908        public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
2909        /***/
2910        public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
2911        /***/
2912        public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
2913        /***/
2914        public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2915        /***/
2916        public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2917        /***/
2918        public static final int E_MODIFIER = 41;  /*[EM]*/
2919        /***/
2920        public static final int ZWJ = 42;  /*[ZWJ]*/
2921        /**
2922         * One more than the highest normal LineBreak value.
2923         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
2924         *
2925         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2926         * @hide unsupported on Android
2927         */
2928        @Deprecated
2929        public static final int COUNT = 43;
2930    }
2931
2932    /**
2933     * Numeric Type constants.
2934     * @see UProperty#NUMERIC_TYPE
2935     */
2936    public static interface NumericType
2937    {
2938        /**
2939         */
2940        public static final int NONE = 0;
2941        /**
2942         */
2943        public static final int DECIMAL = 1;
2944        /**
2945         */
2946        public static final int DIGIT = 2;
2947        /**
2948         */
2949        public static final int NUMERIC = 3;
2950        /**
2951         * One more than the highest normal NumericType value.
2952         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
2953         *
2954         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2955         * @hide unsupported on Android
2956         */
2957        @Deprecated
2958        public static final int COUNT = 4;
2959    }
2960
2961    /**
2962     * Hangul Syllable Type constants.
2963     *
2964     * @see UProperty#HANGUL_SYLLABLE_TYPE
2965     */
2966    public static interface HangulSyllableType
2967    {
2968        /**
2969         */
2970        public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
2971        /**
2972         */
2973        public static final int LEADING_JAMO        = 1;   /*[L]*/
2974        /**
2975         */
2976        public static final int VOWEL_JAMO          = 2;   /*[V]*/
2977        /**
2978         */
2979        public static final int TRAILING_JAMO       = 3;   /*[T]*/
2980        /**
2981         */
2982        public static final int LV_SYLLABLE         = 4;   /*[LV]*/
2983        /**
2984         */
2985        public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
2986        /**
2987         * One more than the highest normal HangulSyllableType value.
2988         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
2989         *
2990         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2991         * @hide unsupported on Android
2992         */
2993        @Deprecated
2994        public static final int COUNT               = 6;
2995    }
2996
2997    /**
2998     * Bidi Paired Bracket Type constants.
2999     *
3000     * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3001     */
3002    public static interface BidiPairedBracketType {
3003        /**
3004         * Not a paired bracket.
3005         */
3006        public static final int NONE = 0;
3007        /**
3008         * Open paired bracket.
3009         */
3010        public static final int OPEN = 1;
3011        /**
3012         * Close paired bracket.
3013         */
3014        public static final int CLOSE = 2;
3015        /**
3016         * One more than the highest normal BidiPairedBracketType value.
3017         * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3018         *
3019         * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3020         * @hide unsupported on Android
3021         */
3022        @Deprecated
3023        public static final int COUNT = 3;
3024    }
3025
3026    // public data members -----------------------------------------------
3027
3028    /**
3029     * The lowest Unicode code point value, constant 0.
3030     * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
3031     */
3032    public static final int MIN_VALUE = Character.MIN_CODE_POINT;
3033
3034    /**
3035     * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
3036     * Same as {@link Character#MAX_CODE_POINT}.
3037     *
3038     * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
3039     * which is still a char with the value U+FFFF.
3040     */
3041    public static final int MAX_VALUE = Character.MAX_CODE_POINT;
3042
3043    /**
3044     * The minimum value for Supplementary code points, constant U+10000.
3045     * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
3046     */
3047    public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
3048
3049    /**
3050     * Unicode value used when translating into Unicode encoding form and there
3051     * is no existing character.
3052     */
3053    public static final int REPLACEMENT_CHAR = '\uFFFD';
3054
3055    /**
3056     * Special value that is returned by getUnicodeNumericValue(int) when no
3057     * numeric value is defined for a code point.
3058     * @see #getUnicodeNumericValue
3059     */
3060    public static final double NO_NUMERIC_VALUE = -123456789;
3061
3062    /**
3063     * Compatibility constant for Java Character's MIN_RADIX.
3064     */
3065    public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3066
3067    /**
3068     * Compatibility constant for Java Character's MAX_RADIX.
3069     */
3070    public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3071
3072    /**
3073     * Do not lowercase non-initial parts of words when titlecasing.
3074     * Option bit for titlecasing APIs that take an options bit set.
3075     *
3076     * By default, titlecasing will titlecase the first cased character
3077     * of a word and lowercase all other characters.
3078     * With this option, the other characters will not be modified.
3079     *
3080     * @see #toTitleCase
3081     */
3082    public static final int TITLECASE_NO_LOWERCASE = 0x100;
3083
3084    /**
3085     * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3086     * titlecase exactly the characters at breaks from the iterator.
3087     * Option bit for titlecasing APIs that take an options bit set.
3088     *
3089     * By default, titlecasing will take each break iterator index,
3090     * adjust it by looking for the next cased character, and titlecase that one.
3091     * Other characters are lowercased.
3092     *
3093     * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
3094     *
3095     * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3096     * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3097     * cased character F. If F exists, map F to default_title(F); then map each
3098     * subsequent character C to default_lower(C).
3099     *
3100     * @see #toTitleCase
3101     * @see #TITLECASE_NO_LOWERCASE
3102     */
3103    public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3104
3105    // public methods ----------------------------------------------------
3106
3107    /**
3108     * Returnss the numeric value of a decimal digit code point.
3109     * <br>This method observes the semantics of
3110     * <code>java.lang.Character.digit()</code>.  Note that this
3111     * will return positive values for code points for which isDigit
3112     * returns false, just like java.lang.Character.
3113     * <br><em>Semantic Change:</em> In release 1.3.1 and
3114     * prior, this did not treat the European letters as having a
3115     * digit value, and also treated numeric letters and other numbers as
3116     * digits.
3117     * This has been changed to conform to the java semantics.
3118     * <br>A code point is a valid digit if and only if:
3119     * <ul>
3120     *   <li>ch is a decimal digit or one of the european letters, and
3121     *   <li>the value of ch is less than the specified radix.
3122     * </ul>
3123     * @param ch the code point to query
3124     * @param radix the radix
3125     * @return the numeric value represented by the code point in the
3126     * specified radix, or -1 if the code point is not a decimal digit
3127     * or if its value is too large for the radix
3128     */
3129    public static int digit(int ch, int radix)
3130    {
3131        if (2 <= radix && radix <= 36) {
3132            int value = digit(ch);
3133            if (value < 0) {
3134                // ch is not a decimal digit, try latin letters
3135                value = UCharacterProperty.getEuropeanDigit(ch);
3136            }
3137            return (value < radix) ? value : -1;
3138        } else {
3139            return -1;  // invalid radix
3140        }
3141    }
3142
3143    /**
3144     * Returnss the numeric value of a decimal digit code point.
3145     * <br>This is a convenience overload of <code>digit(int, int)</code>
3146     * that provides a decimal radix.
3147     * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3148     * treated numeric letters and other numbers as digits.  This has
3149     * been changed to conform to the java semantics.
3150     * @param ch the code point to query
3151     * @return the numeric value represented by the code point,
3152     * or -1 if the code point is not a decimal digit or if its
3153     * value is too large for a decimal radix
3154     */
3155    public static int digit(int ch)
3156    {
3157        return UCharacterProperty.INSTANCE.digit(ch);
3158    }
3159
3160    /**
3161     * Returns the numeric value of the code point as a nonnegative
3162     * integer.
3163     * <br>If the code point does not have a numeric value, then -1 is returned.
3164     * <br>
3165     * If the code point has a numeric value that cannot be represented as a
3166     * nonnegative integer (for example, a fractional value), then -2 is
3167     * returned.
3168     * @param ch the code point to query
3169     * @return the numeric value of the code point, or -1 if it has no numeric
3170     * value, or -2 if it has a numeric value that cannot be represented as a
3171     * nonnegative integer
3172     */
3173    public static int getNumericValue(int ch)
3174    {
3175        return UCharacterProperty.INSTANCE.getNumericValue(ch);
3176    }
3177
3178    /**
3179     * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the
3180     * Unicode Character Database.
3181     * <p>A "double" return type is necessary because some numeric values are
3182     * fractions, negative, or too large for int.
3183     * <p>For characters without any numeric values in the Unicode Character
3184     * Database, this function will return NO_NUMERIC_VALUE.
3185     * Note: This is different from the Unicode Standard which specifies NaN as the default value.
3186     * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3187     * return type int and returns -1 when the argument ch does not have a
3188     * corresponding numeric value. This has been changed to synch with ICU4C
3189     *
3190     * This corresponds to the ICU4C function u_getNumericValue.
3191     * @param ch Code point to get the numeric value for.
3192     * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3193     */
3194    public static double getUnicodeNumericValue(int ch)
3195    {
3196        return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3197    }
3198
3199    /**
3200     * Compatibility override of Java deprecated method.  This
3201     * method will always remain deprecated.
3202     * Same as java.lang.Character.isSpace().
3203     * @param ch the code point
3204     * @return true if the code point is a space character as
3205     * defined by java.lang.Character.isSpace.
3206     * @deprecated ICU 3.4 (Java)
3207     * @hide original deprecated declaration
3208     */
3209    @Deprecated
3210    public static boolean isSpace(int ch) {
3211        return ch <= 0x20 &&
3212                (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3213    }
3214
3215    /**
3216     * Returns a value indicating a code point's Unicode category.
3217     * Up-to-date Unicode implementation of java.lang.Character.getType()
3218     * except for the above mentioned code points that had their category
3219     * changed.<br>
3220     * Return results are constants from the interface
3221     * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3222     * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3223     * those returned by java.lang.Character.getType.  UCharacterCategory values
3224     * match the ones used in ICU4C, while java.lang.Character type
3225     * values, though similar, skip the value 17.
3226     * @param ch code point whose type is to be determined
3227     * @return category which is a value of UCharacterCategory
3228     */
3229    public static int getType(int ch)
3230    {
3231        return UCharacterProperty.INSTANCE.getType(ch);
3232    }
3233
3234    /**
3235     * Determines if a code point has a defined meaning in the up-to-date
3236     * Unicode standard.
3237     * E.g. supplementary code points though allocated space are not defined in
3238     * Unicode yet.<br>
3239     * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3240     * @param ch code point to be determined if it is defined in the most
3241     *        current version of Unicode
3242     * @return true if this code point is defined in unicode
3243     */
3244    public static boolean isDefined(int ch)
3245    {
3246        return getType(ch) != 0;
3247    }
3248
3249    /**
3250     * Determines if a code point is a Java digit.
3251     * <br>This method observes the semantics of
3252     * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3253     * digits only.
3254     * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3255     * numeric letters and other numbers as digits.
3256     * This has been changed to conform to the java semantics.
3257     * @param ch code point to query
3258     * @return true if this code point is a digit
3259     */
3260    public static boolean isDigit(int ch)
3261    {
3262        return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3263    }
3264
3265    /**
3266     * Determines if the specified code point is an ISO control character.
3267     * A code point is considered to be an ISO control character if it is in
3268     * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
3269     * &#92;u009F.<br>
3270     * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3271     * @param ch code point to determine if it is an ISO control character
3272     * @return true if code point is a ISO control character
3273     */
3274    public static boolean isISOControl(int ch)
3275    {
3276        return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3277                ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3278    }
3279
3280    /**
3281     * Determines if the specified code point is a letter.
3282     * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3283     * @param ch code point to determine if it is a letter
3284     * @return true if code point is a letter
3285     */
3286    public static boolean isLetter(int ch)
3287    {
3288        // if props == 0, it will just fall through and return false
3289        return ((1 << getType(ch))
3290                & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3291                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3292                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3293                        | (1 << UCharacterCategory.MODIFIER_LETTER)
3294                        | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3295    }
3296
3297    /**
3298     * Determines if the specified code point is a letter or digit.
3299     * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii
3300     * characters 'A' - 'Z' and 'a' - 'z' as digits.
3301     * @param ch code point to determine if it is a letter or a digit
3302     * @return true if code point is a letter or a digit
3303     */
3304    public static boolean isLetterOrDigit(int ch)
3305    {
3306        return ((1 << getType(ch))
3307                & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3308                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3309                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3310                        | (1 << UCharacterCategory.MODIFIER_LETTER)
3311                        | (1 << UCharacterCategory.OTHER_LETTER)
3312                        | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3313    }
3314
3315    /**
3316     * Compatibility override of Java deprecated method.  This
3317     * method will always remain deprecated.  Delegates to
3318     * java.lang.Character.isJavaIdentifierStart.
3319     * @param cp the code point
3320     * @return true if the code point can start a java identifier.
3321     * @deprecated ICU 3.4 (Java)
3322     * @hide original deprecated declaration
3323     */
3324    @Deprecated
3325    public static boolean isJavaLetter(int cp) {
3326        return isJavaIdentifierStart(cp);
3327    }
3328
3329    /**
3330     * Compatibility override of Java deprecated method.  This
3331     * method will always remain deprecated.  Delegates to
3332     * java.lang.Character.isJavaIdentifierPart.
3333     * @param cp the code point
3334     * @return true if the code point can continue a java identifier.
3335     * @deprecated ICU 3.4 (Java)
3336     * @hide original deprecated declaration
3337     */
3338    @Deprecated
3339    public static boolean isJavaLetterOrDigit(int cp) {
3340        return isJavaIdentifierPart(cp);
3341    }
3342
3343    /**
3344     * Compatibility override of Java method, delegates to
3345     * java.lang.Character.isJavaIdentifierStart.
3346     * @param cp the code point
3347     * @return true if the code point can start a java identifier.
3348     */
3349    public static boolean isJavaIdentifierStart(int cp) {
3350        // note, downcast to char for jdk 1.4 compatibility
3351        return java.lang.Character.isJavaIdentifierStart((char)cp);
3352    }
3353
3354    /**
3355     * Compatibility override of Java method, delegates to
3356     * java.lang.Character.isJavaIdentifierPart.
3357     * @param cp the code point
3358     * @return true if the code point can continue a java identifier.
3359     */
3360    public static boolean isJavaIdentifierPart(int cp) {
3361        // note, downcast to char for jdk 1.4 compatibility
3362        return java.lang.Character.isJavaIdentifierPart((char)cp);
3363    }
3364
3365    /**
3366     * Determines if the specified code point is a lowercase character.
3367     * UnicodeData only contains case mappings for code points where they are
3368     * one-to-one mappings; it also omits information about context-sensitive
3369     * case mappings.<br> For more information about Unicode case mapping
3370     * please refer to the
3371     * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3372     * #21</a>.<br>
3373     * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3374     * @param ch code point to determine if it is in lowercase
3375     * @return true if code point is a lowercase character
3376     */
3377    public static boolean isLowerCase(int ch)
3378    {
3379        // if props == 0, it will just fall through and return false
3380        return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3381    }
3382
3383    /**
3384     * Determines if the specified code point is a white space character.
3385     * A code point is considered to be an whitespace character if and only
3386     * if it satisfies one of the following criteria:
3387     * <ul>
3388     * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3389     *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
3390     * <li> It is &#92;u0009, HORIZONTAL TABULATION.
3391     * <li> It is &#92;u000A, LINE FEED.
3392     * <li> It is &#92;u000B, VERTICAL TABULATION.
3393     * <li> It is &#92;u000C, FORM FEED.
3394     * <li> It is &#92;u000D, CARRIAGE RETURN.
3395     * <li> It is &#92;u001C, FILE SEPARATOR.
3396     * <li> It is &#92;u001D, GROUP SEPARATOR.
3397     * <li> It is &#92;u001E, RECORD SEPARATOR.
3398     * <li> It is &#92;u001F, UNIT SEPARATOR.
3399     * </ul>
3400     *
3401     * This API tries to sync with the semantics of Java's
3402     * java.lang.Character.isWhitespace(), but it may not return
3403     * the exact same results because of the Unicode version
3404     * difference.
3405     * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3406     * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3407     * See http://www.unicode.org/versions/Unicode4.0.1/
3408     * @param ch code point to determine if it is a white space
3409     * @return true if the specified code point is a white space character
3410     */
3411    public static boolean isWhitespace(int ch)
3412    {
3413        // exclude no-break spaces
3414        // if props == 0, it will just fall through and return false
3415        return ((1 << getType(ch)) &
3416                ((1 << UCharacterCategory.SPACE_SEPARATOR)
3417                        | (1 << UCharacterCategory.LINE_SEPARATOR)
3418                        | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3419                        && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3420                        // TAB VT LF FF CR FS GS RS US NL are all control characters
3421                        // that are white spaces.
3422                        || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3423    }
3424
3425    /**
3426     * Determines if the specified code point is a Unicode specified space
3427     * character, i.e. if code point is in the category Zs, Zl and Zp.
3428     * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3429     * @param ch code point to determine if it is a space
3430     * @return true if the specified code point is a space character
3431     */
3432    public static boolean isSpaceChar(int ch)
3433    {
3434        // if props == 0, it will just fall through and return false
3435        return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3436                | (1 << UCharacterCategory.LINE_SEPARATOR)
3437                | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3438                != 0;
3439    }
3440
3441    /**
3442     * Determines if the specified code point is a titlecase character.
3443     * UnicodeData only contains case mappings for code points where they are
3444     * one-to-one mappings; it also omits information about context-sensitive
3445     * case mappings.<br>
3446     * For more information about Unicode case mapping please refer to the
3447     * <a href=http://www.unicode.org/unicode/reports/tr21/>
3448     * Technical report #21</a>.<br>
3449     * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3450     * @param ch code point to determine if it is in title case
3451     * @return true if the specified code point is a titlecase character
3452     */
3453    public static boolean isTitleCase(int ch)
3454    {
3455        // if props == 0, it will just fall through and return false
3456        return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3457    }
3458
3459    /**
3460     * Determines if the specified code point may be any part of a Unicode
3461     * identifier other than the starting character.
3462     * A code point may be part of a Unicode identifier if and only if it is
3463     * one of the following:
3464     * <ul>
3465     * <li> Lu Uppercase letter
3466     * <li> Ll Lowercase letter
3467     * <li> Lt Titlecase letter
3468     * <li> Lm Modifier letter
3469     * <li> Lo Other letter
3470     * <li> Nl Letter number
3471     * <li> Pc Connecting punctuation character
3472     * <li> Nd decimal number
3473     * <li> Mc Spacing combining mark
3474     * <li> Mn Non-spacing mark
3475     * <li> Cf formatting code
3476     * </ul>
3477     * Up-to-date Unicode implementation of
3478     * java.lang.Character.isUnicodeIdentifierPart().<br>
3479     * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3480     * @param ch code point to determine if is can be part of a Unicode
3481     *        identifier
3482     * @return true if code point is any character belonging a unicode
3483     *         identifier suffix after the first character
3484     */
3485    public static boolean isUnicodeIdentifierPart(int ch)
3486    {
3487        // if props == 0, it will just fall through and return false
3488        // cat == format
3489        return ((1 << getType(ch))
3490                & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3491                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3492                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3493                        | (1 << UCharacterCategory.MODIFIER_LETTER)
3494                        | (1 << UCharacterCategory.OTHER_LETTER)
3495                        | (1 << UCharacterCategory.LETTER_NUMBER)
3496                        | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3497                        | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3498                        | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3499                        | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3500                        || isIdentifierIgnorable(ch);
3501    }
3502
3503    /**
3504     * Determines if the specified code point is permissible as the first
3505     * character in a Unicode identifier.
3506     * A code point may start a Unicode identifier if it is of type either
3507     * <ul>
3508     * <li> Lu Uppercase letter
3509     * <li> Ll Lowercase letter
3510     * <li> Lt Titlecase letter
3511     * <li> Lm Modifier letter
3512     * <li> Lo Other letter
3513     * <li> Nl Letter number
3514     * </ul>
3515     * Up-to-date Unicode implementation of
3516     * java.lang.Character.isUnicodeIdentifierStart().<br>
3517     * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3518     * @param ch code point to determine if it can start a Unicode identifier
3519     * @return true if code point is the first character belonging a unicode
3520     *              identifier
3521     */
3522    public static boolean isUnicodeIdentifierStart(int ch)
3523    {
3524        /*int cat = getType(ch);*/
3525        // if props == 0, it will just fall through and return false
3526        return ((1 << getType(ch))
3527                & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3528                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3529                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3530                        | (1 << UCharacterCategory.MODIFIER_LETTER)
3531                        | (1 << UCharacterCategory.OTHER_LETTER)
3532                        | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3533    }
3534
3535    /**
3536     * Determines if the specified code point should be regarded as an
3537     * ignorable character in a Java identifier.
3538     * A character is Java-identifier-ignorable if it has the general category
3539     * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
3540     * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
3541     * Up-to-date Unicode implementation of
3542     * java.lang.Character.isIdentifierIgnorable().<br>
3543     * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3544     * <p>Note that Unicode just recommends to ignore Cf (format controls).
3545     * @param ch code point to be determined if it can be ignored in a Unicode
3546     *        identifier.
3547     * @return true if the code point is ignorable
3548     */
3549    public static boolean isIdentifierIgnorable(int ch)
3550    {
3551        // see java.lang.Character.isIdentifierIgnorable() on range of
3552        // ignorable characters.
3553        if (ch <= 0x9f) {
3554            return isISOControl(ch)
3555                    && !((ch >= 0x9 && ch <= 0xd)
3556                            || (ch >= 0x1c && ch <= 0x1f));
3557        }
3558        return getType(ch) == UCharacterCategory.FORMAT;
3559    }
3560
3561    /**
3562     * Determines if the specified code point is an uppercase character.
3563     * UnicodeData only contains case mappings for code point where they are
3564     * one-to-one mappings; it also omits information about context-sensitive
3565     * case mappings.<br>
3566     * For language specific case conversion behavior, use
3567     * toUpperCase(locale, str). <br>
3568     * For example, the case conversion for dot-less i and dotted I in Turkish,
3569     * or for final sigma in Greek.
3570     * For more information about Unicode case mapping please refer to the
3571     * <a href=http://www.unicode.org/unicode/reports/tr21/>
3572     * Technical report #21</a>.<br>
3573     * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3574     * @param ch code point to determine if it is in uppercase
3575     * @return true if the code point is an uppercase character
3576     */
3577    public static boolean isUpperCase(int ch)
3578    {
3579        // if props == 0, it will just fall through and return false
3580        return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3581    }
3582
3583    /**
3584     * The given code point is mapped to its lowercase equivalent; if the code
3585     * point has no lowercase equivalent, the code point itself is returned.
3586     * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3587     *
3588     * <p>This function only returns the simple, single-code point case mapping.
3589     * Full case mappings should be used whenever possible because they produce
3590     * better results by working on whole strings.
3591     * They take into account the string context and the language and can map
3592     * to a result string with a different length as appropriate.
3593     * Full case mappings are applied by the case mapping functions
3594     * that take String parameters rather than code points (int).
3595     * See also the User Guide chapter on C/POSIX migration:
3596     * http://www.icu-project.org/userguide/posix.html#case_mappings
3597     *
3598     * @param ch code point whose lowercase equivalent is to be retrieved
3599     * @return the lowercase equivalent code point
3600     */
3601    public static int toLowerCase(int ch) {
3602        return UCaseProps.INSTANCE.tolower(ch);
3603    }
3604
3605    /**
3606     * Converts argument code point and returns a String object representing
3607     * the code point's value in UTF-16 format.
3608     * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
3609     *
3610     * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
3611     *
3612     * @param ch code point
3613     * @return string representation of the code point, null if code point is not
3614     *         defined in unicode
3615     */
3616    public static String toString(int ch)
3617    {
3618        if (ch < MIN_VALUE || ch > MAX_VALUE) {
3619            return null;
3620        }
3621
3622        if (ch < SUPPLEMENTARY_MIN_VALUE) {
3623            return String.valueOf((char)ch);
3624        }
3625
3626        return new String(Character.toChars(ch));
3627    }
3628
3629    /**
3630     * Converts the code point argument to titlecase.
3631     * If no titlecase is available, the uppercase is returned. If no uppercase
3632     * is available, the code point itself is returned.
3633     * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
3634     *
3635     * <p>This function only returns the simple, single-code point case mapping.
3636     * Full case mappings should be used whenever possible because they produce
3637     * better results by working on whole strings.
3638     * They take into account the string context and the language and can map
3639     * to a result string with a different length as appropriate.
3640     * Full case mappings are applied by the case mapping functions
3641     * that take String parameters rather than code points (int).
3642     * See also the User Guide chapter on C/POSIX migration:
3643     * http://www.icu-project.org/userguide/posix.html#case_mappings
3644     *
3645     * @param ch code point  whose title case is to be retrieved
3646     * @return titlecase code point
3647     */
3648    public static int toTitleCase(int ch) {
3649        return UCaseProps.INSTANCE.totitle(ch);
3650    }
3651
3652    /**
3653     * Converts the character argument to uppercase.
3654     * If no uppercase is available, the character itself is returned.
3655     * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
3656     *
3657     * <p>This function only returns the simple, single-code point case mapping.
3658     * Full case mappings should be used whenever possible because they produce
3659     * better results by working on whole strings.
3660     * They take into account the string context and the language and can map
3661     * to a result string with a different length as appropriate.
3662     * Full case mappings are applied by the case mapping functions
3663     * that take String parameters rather than code points (int).
3664     * See also the User Guide chapter on C/POSIX migration:
3665     * http://www.icu-project.org/userguide/posix.html#case_mappings
3666     *
3667     * @param ch code point whose uppercase is to be retrieved
3668     * @return uppercase code point
3669     */
3670    public static int toUpperCase(int ch) {
3671        return UCaseProps.INSTANCE.toupper(ch);
3672    }
3673
3674    // extra methods not in java.lang.Character --------------------------
3675
3676    /**
3677     * <strong>[icu]</strong> Determines if the code point is a supplementary character.
3678     * A code point is a supplementary character if and only if it is greater
3679     * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
3680     * @param ch code point to be determined if it is in the supplementary
3681     *        plane
3682     * @return true if code point is a supplementary character
3683     */
3684    public static boolean isSupplementary(int ch)
3685    {
3686        return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
3687                ch <= UCharacter.MAX_VALUE;
3688    }
3689
3690    /**
3691     * <strong>[icu]</strong> Determines if the code point is in the BMP plane.
3692     * @param ch code point to be determined if it is not a supplementary
3693     *        character
3694     * @return true if code point is not a supplementary character
3695     */
3696    public static boolean isBMP(int ch)
3697    {
3698        return (ch >= 0 && ch <= LAST_CHAR_MASK_);
3699    }
3700
3701    /**
3702     * <strong>[icu]</strong> Determines whether the specified code point is a printable character
3703     * according to the Unicode standard.
3704     * @param ch code point to be determined if it is printable
3705     * @return true if the code point is a printable character
3706     */
3707    public static boolean isPrintable(int ch)
3708    {
3709        int cat = getType(ch);
3710        // if props == 0, it will just fall through and return false
3711        return (cat != UCharacterCategory.UNASSIGNED &&
3712                cat != UCharacterCategory.CONTROL &&
3713                cat != UCharacterCategory.FORMAT &&
3714                cat != UCharacterCategory.PRIVATE_USE &&
3715                cat != UCharacterCategory.SURROGATE &&
3716                cat != UCharacterCategory.GENERAL_OTHER_TYPES);
3717    }
3718
3719    /**
3720     * <strong>[icu]</strong> Determines whether the specified code point is of base form.
3721     * A code point of base form does not graphically combine with preceding
3722     * characters, and is neither a control nor a format character.
3723     * @param ch code point to be determined if it is of base form
3724     * @return true if the code point is of base form
3725     */
3726    public static boolean isBaseForm(int ch)
3727    {
3728        int cat = getType(ch);
3729        // if props == 0, it will just fall through and return false
3730        return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
3731                cat == UCharacterCategory.OTHER_NUMBER ||
3732                cat == UCharacterCategory.LETTER_NUMBER ||
3733                cat == UCharacterCategory.UPPERCASE_LETTER ||
3734                cat == UCharacterCategory.LOWERCASE_LETTER ||
3735                cat == UCharacterCategory.TITLECASE_LETTER ||
3736                cat == UCharacterCategory.MODIFIER_LETTER ||
3737                cat == UCharacterCategory.OTHER_LETTER ||
3738                cat == UCharacterCategory.NON_SPACING_MARK ||
3739                cat == UCharacterCategory.ENCLOSING_MARK ||
3740                cat == UCharacterCategory.COMBINING_SPACING_MARK;
3741    }
3742
3743    /**
3744     * <strong>[icu]</strong> Returns the Bidirection property of a code point.
3745     * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
3746     * property.<br>
3747     * Result returned belongs to the interface
3748     * <a href=UCharacterDirection.html>UCharacterDirection</a>
3749     * @param ch the code point to be determined its direction
3750     * @return direction constant from UCharacterDirection.
3751     */
3752    public static int getDirection(int ch)
3753    {
3754        return UBiDiProps.INSTANCE.getClass(ch);
3755    }
3756
3757    /**
3758     * Determines whether the code point has the "mirrored" property.
3759     * This property is set for characters that are commonly used in
3760     * Right-To-Left contexts and need to be displayed with a "mirrored"
3761     * glyph.
3762     * @param ch code point whose mirror is to be determined
3763     * @return true if the code point has the "mirrored" property
3764     */
3765    public static boolean isMirrored(int ch)
3766    {
3767        return UBiDiProps.INSTANCE.isMirrored(ch);
3768    }
3769
3770    /**
3771     * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point.
3772     * For code points with the "mirrored" property, implementations sometimes
3773     * need a "poor man's" mapping to another code point such that the default
3774     * glyph may serve as the mirror-image of the default glyph of the
3775     * specified code point.<br>
3776     * This is useful for text conversion to and from codepages with visual
3777     * order, and for displays without glyph selection capabilities.
3778     * @param ch code point whose mirror is to be retrieved
3779     * @return another code point that may serve as a mirror-image substitute,
3780     *         or ch itself if there is no such mapping or ch does not have the
3781     *         "mirrored" property
3782     */
3783    public static int getMirror(int ch)
3784    {
3785        return UBiDiProps.INSTANCE.getMirror(ch);
3786    }
3787
3788    /**
3789     * <strong>[icu]</strong> Maps the specified character to its paired bracket character.
3790     * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
3791     * Otherwise c itself is returned.
3792     * See http://www.unicode.org/reports/tr9/
3793     *
3794     * @param c the code point to be mapped
3795     * @return the paired bracket code point,
3796     *         or c itself if there is no such mapping
3797     *         (Bidi_Paired_Bracket_Type=None)
3798     *
3799     * @see UProperty#BIDI_PAIRED_BRACKET
3800     * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3801     * @see #getMirror(int)
3802     */
3803    public static int getBidiPairedBracket(int c) {
3804        return UBiDiProps.INSTANCE.getPairedBracket(c);
3805    }
3806
3807    /**
3808     * <strong>[icu]</strong> Returns the combining class of the argument codepoint
3809     * @param ch code point whose combining is to be retrieved
3810     * @return the combining class of the codepoint
3811     */
3812    public static int getCombiningClass(int ch)
3813    {
3814        return Normalizer2.getNFDInstance().getCombiningClass(ch);
3815    }
3816
3817    /**
3818     * <strong>[icu]</strong> A code point is illegal if and only if
3819     * <ul>
3820     * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3821     * <li> A surrogate value, 0xD800 to 0xDFFF
3822     * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3823     * </ul>
3824     * Note: legal does not mean that it is assigned in this version of Unicode.
3825     * @param ch code point to determine if it is a legal code point by itself
3826     * @return true if and only if legal.
3827     */
3828    public static boolean isLegal(int ch)
3829    {
3830        if (ch < MIN_VALUE) {
3831            return false;
3832        }
3833        if (ch < Character.MIN_SURROGATE) {
3834            return true;
3835        }
3836        if (ch <= Character.MAX_SURROGATE) {
3837            return false;
3838        }
3839        if (UCharacterUtility.isNonCharacter(ch)) {
3840            return false;
3841        }
3842        return (ch <= MAX_VALUE);
3843    }
3844
3845    /**
3846     * <strong>[icu]</strong> A string is legal iff all its code points are legal.
3847     * A code point is illegal if and only if
3848     * <ul>
3849     * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3850     * <li> A surrogate value, 0xD800 to 0xDFFF
3851     * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3852     * </ul>
3853     * Note: legal does not mean that it is assigned in this version of Unicode.
3854     * @param str containing code points to examin
3855     * @return true if and only if legal.
3856     */
3857    public static boolean isLegal(String str)
3858    {
3859        int size = str.length();
3860        int codepoint;
3861        for (int i = 0; i < size; i += Character.charCount(codepoint))
3862        {
3863            codepoint = str.codePointAt(i);
3864            if (!isLegal(codepoint)) {
3865                return false;
3866            }
3867        }
3868        return true;
3869    }
3870
3871    /**
3872     * <strong>[icu]</strong> Returns the version of Unicode data used.
3873     * @return the unicode version number used
3874     */
3875    public static VersionInfo getUnicodeVersion()
3876    {
3877        return UCharacterProperty.INSTANCE.m_unicodeVersion_;
3878    }
3879
3880    /**
3881     * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or
3882     * null if the character is unassigned or outside the range
3883     * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3884     * <br>
3885     * Note calling any methods related to code point names, e.g. get*Name*()
3886     * incurs a one-time initialisation cost to construct the name tables.
3887     * @param ch the code point for which to get the name
3888     * @return most current Unicode name
3889     */
3890    public static String getName(int ch)
3891    {
3892        return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
3893    }
3894
3895    /**
3896     * <strong>[icu]</strong> Returns the names for each of the characters in a string
3897     * @param s string to format
3898     * @param separator string to go between names
3899     * @return string of names
3900     */
3901    public static String getName(String s, String separator) {
3902        if (s.length() == 1) { // handle common case
3903            return getName(s.charAt(0));
3904        }
3905        int cp;
3906        StringBuilder sb = new StringBuilder();
3907        for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
3908            cp = s.codePointAt(i);
3909            if (i != 0) sb.append(separator);
3910            sb.append(UCharacter.getName(cp));
3911        }
3912        return sb.toString();
3913    }
3914
3915    /**
3916     * <strong>[icu]</strong> Returns null.
3917     * Used to return the Unicode_1_Name property value which was of little practical value.
3918     * @param ch the code point for which to get the name
3919     * @return null
3920     * @deprecated ICU 49
3921     * @hide original deprecated declaration
3922     */
3923    @Deprecated
3924    public static String getName1_0(int ch)
3925    {
3926        return null;
3927    }
3928
3929    /**
3930     * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and
3931     * getName1_0(int), this method will return a name even for codepoints that
3932     * are not assigned a name in UnicodeData.txt.
3933     *
3934     * <p>The names are returned in the following order.
3935     * <ul>
3936     * <li> Most current Unicode name if there is any
3937     * <li> Unicode 1.0 name if there is any
3938     * <li> Extended name in the form of
3939     *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
3940     * </ul>
3941     * Note calling any methods related to code point names, e.g. get*Name*()
3942     * incurs a one-time initialisation cost to construct the name tables.
3943     * @param ch the code point for which to get the name
3944     * @return a name for the argument codepoint
3945     */
3946    public static String getExtendedName(int ch) {
3947        return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
3948    }
3949
3950    /**
3951     * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one.
3952     * Returns null if the character is unassigned or outside the range
3953     * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3954     * <br>
3955     * Note calling any methods related to code point names, e.g. get*Name*()
3956     * incurs a one-time initialisation cost to construct the name tables.
3957     * @param ch the code point for which to get the name alias
3958     * @return Unicode name alias, or null
3959     */
3960    public static String getNameAlias(int ch)
3961    {
3962        return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
3963    }
3964
3965    /**
3966     * <strong>[icu]</strong> Returns null.
3967     * Used to return the ISO 10646 comment for a character.
3968     * The Unicode ISO_Comment property is deprecated and has no values.
3969     *
3970     * @param ch The code point for which to get the ISO comment.
3971     *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
3972     * @return null
3973     * @deprecated ICU 49
3974     * @hide original deprecated declaration
3975     */
3976    @Deprecated
3977    public static String getISOComment(int ch)
3978    {
3979        return null;
3980    }
3981
3982    /**
3983     * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and
3984     * return its code point value. All Unicode names are in uppercase.
3985     * Note calling any methods related to code point names, e.g. get*Name*()
3986     * incurs a one-time initialisation cost to construct the name tables.
3987     * @param name most current Unicode character name whose code point is to
3988     *        be returned
3989     * @return code point or -1 if name is not found
3990     */
3991    public static int getCharFromName(String name){
3992        return UCharacterName.INSTANCE.getCharFromName(
3993                UCharacterNameChoice.UNICODE_CHAR_NAME, name);
3994    }
3995
3996    /**
3997     * <strong>[icu]</strong> Returns -1.
3998     * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
3999     * its code point value.
4000     * @param name Unicode 1.0 code point name whose code point is to be
4001     *             returned
4002     * @return -1
4003     * @deprecated ICU 49
4004     * @see #getName1_0(int)
4005     * @hide original deprecated declaration
4006     */
4007    @Deprecated
4008    public static int getCharFromName1_0(String name){
4009        return -1;
4010    }
4011
4012    /**
4013     * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code
4014     * point value. All Unicode names are in uppercase.
4015     * Extended names are all lowercase except for numbers and are contained
4016     * within angle brackets.
4017     * The names are searched in the following order
4018     * <ul>
4019     * <li> Most current Unicode name if there is any
4020     * <li> Unicode 1.0 name if there is any
4021     * <li> Extended name in the form of
4022     *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
4023     * </ul>
4024     * Note calling any methods related to code point names, e.g. get*Name*()
4025     * incurs a one-time initialisation cost to construct the name tables.
4026     * @param name codepoint name
4027     * @return code point associated with the name or -1 if the name is not
4028     *         found.
4029     */
4030    public static int getCharFromExtendedName(String name){
4031        return UCharacterName.INSTANCE.getCharFromName(
4032                UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4033    }
4034
4035    /**
4036     * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return
4037     * its code point value. All Unicode names are in uppercase.
4038     * Note calling any methods related to code point names, e.g. get*Name*()
4039     * incurs a one-time initialisation cost to construct the name tables.
4040     * @param name Unicode name alias whose code point is to be returned
4041     * @return code point or -1 if name is not found
4042     */
4043    public static int getCharFromNameAlias(String name){
4044        return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4045    }
4046
4047    /**
4048     * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the
4049     * Unicode database file PropertyAliases.txt.  Most properties
4050     * have more than one name.  The nameChoice determines which one
4051     * is returned.
4052     *
4053     * In addition, this function maps the property
4054     * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4055     * "General_Category_Mask".  These names are not in
4056     * PropertyAliases.txt.
4057     *
4058     * @param property UProperty selector.
4059     *
4060     * @param nameChoice UProperty.NameChoice selector for which name
4061     * to get.  All properties have a long name.  Most have a short
4062     * name, but some do not.  Unicode allows for additional names; if
4063     * present these will be returned by UProperty.NameChoice.LONG + i,
4064     * where i=1, 2,...
4065     *
4066     * @return a name, or null if Unicode explicitly defines no name
4067     * ("n/a") for a given property/nameChoice.  If a given nameChoice
4068     * throws an exception, then all larger values of nameChoice will
4069     * throw an exception.  If null is returned for a given
4070     * nameChoice, then other nameChoice values may return non-null
4071     * results.
4072     *
4073     * @exception IllegalArgumentException thrown if property or
4074     * nameChoice are invalid.
4075     *
4076     * @see UProperty
4077     * @see UProperty.NameChoice
4078     */
4079    public static String getPropertyName(int property,
4080            int nameChoice) {
4081        return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4082    }
4083
4084    /**
4085     * <strong>[icu]</strong> Return the UProperty selector for a given property name, as
4086     * specified in the Unicode database file PropertyAliases.txt.
4087     * Short, long, and any other variants are recognized.
4088     *
4089     * In addition, this function maps the synthetic names "gcm" /
4090     * "General_Category_Mask" to the property
4091     * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4092     * PropertyAliases.txt.
4093     *
4094     * @param propertyAlias the property name to be matched.  The name
4095     * is compared using "loose matching" as described in
4096     * PropertyAliases.txt.
4097     *
4098     * @return a UProperty enum.
4099     *
4100     * @exception IllegalArgumentException thrown if propertyAlias
4101     * is not recognized.
4102     *
4103     * @see UProperty
4104     */
4105    public static int getPropertyEnum(CharSequence propertyAlias) {
4106        int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4107        if (propEnum == UProperty.UNDEFINED) {
4108            throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4109        }
4110        return propEnum;
4111    }
4112
4113    /**
4114     * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in
4115     * the Unicode database file PropertyValueAliases.txt.  Most
4116     * values have more than one name.  The nameChoice determines
4117     * which one is returned.
4118     *
4119     * Note: Some of the names in PropertyValueAliases.txt can only be
4120     * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4121     * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4122     * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4123     * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4124     *
4125     * @param property UProperty selector constant.
4126     * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4127     * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4128     * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4129     * If out of range, null is returned.
4130     *
4131     * @param value selector for a value for the given property.  In
4132     * general, valid values range from 0 up to some maximum.  There
4133     * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4134     * non-zero value BASIC_LATIN.getID().  (2.)
4135     * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4136     * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4137     * are mask values produced by left-shifting 1 by
4138     * UCharacter.getType().  This allows grouped categories such as
4139     * [:L:] to be represented.  Mask values are non-contiguous.
4140     *
4141     * @param nameChoice UProperty.NameChoice selector for which name
4142     * to get.  All values have a long name.  Most have a short name,
4143     * but some do not.  Unicode allows for additional names; if
4144     * present these will be returned by UProperty.NameChoice.LONG + i,
4145     * where i=1, 2,...
4146     *
4147     * @return a name, or null if Unicode explicitly defines no name
4148     * ("n/a") for a given property/value/nameChoice.  If a given
4149     * nameChoice throws an exception, then all larger values of
4150     * nameChoice will throw an exception.  If null is returned for a
4151     * given nameChoice, then other nameChoice values may return
4152     * non-null results.
4153     *
4154     * @exception IllegalArgumentException thrown if property, value,
4155     * or nameChoice are invalid.
4156     *
4157     * @see UProperty
4158     * @see UProperty.NameChoice
4159     */
4160    public static String getPropertyValueName(int property,
4161            int value,
4162            int nameChoice)
4163    {
4164        if ((property == UProperty.CANONICAL_COMBINING_CLASS
4165                || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4166                || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4167                && value >= UCharacter.getIntPropertyMinValue(
4168                        UProperty.CANONICAL_COMBINING_CLASS)
4169                        && value <= UCharacter.getIntPropertyMaxValue(
4170                                UProperty.CANONICAL_COMBINING_CLASS)
4171                                && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4172            // this is hard coded for the valid cc
4173            // because PropertyValueAliases.txt does not contain all of them
4174            try {
4175                return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4176                        nameChoice);
4177            }
4178            catch (IllegalArgumentException e) {
4179                return null;
4180            }
4181        }
4182        return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4183    }
4184
4185    /**
4186     * <strong>[icu]</strong> Return the property value integer for a given value name, as
4187     * specified in the Unicode database file PropertyValueAliases.txt.
4188     * Short, long, and any other variants are recognized.
4189     *
4190     * Note: Some of the names in PropertyValueAliases.txt will only be
4191     * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4192     * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4193     * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4194     * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4195     *
4196     * @param property UProperty selector constant.
4197     * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4198     * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4199     * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4200     * Only these properties can be enumerated.
4201     *
4202     * @param valueAlias the value name to be matched.  The name is
4203     * compared using "loose matching" as described in
4204     * PropertyValueAliases.txt.
4205     *
4206     * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4207     * values are mask values produced by left-shifting 1 by
4208     * UCharacter.getType().  This allows grouped categories such as
4209     * [:L:] to be represented.
4210     *
4211     * @see UProperty
4212     * @throws IllegalArgumentException if property is not a valid UProperty
4213     *         selector or valueAlias is not a value of this property
4214     */
4215    public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4216        int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4217        if (propEnum == UProperty.UNDEFINED) {
4218            throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4219        }
4220        return propEnum;
4221    }
4222
4223    /**
4224     * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
4225     * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
4226     * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
4227     * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
4228     * @deprecated This API is ICU internal only.
4229     * @hide original deprecated declaration
4230     * @hide draft / provisional / internal are hidden on Android
4231     */
4232    @Deprecated
4233    public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
4234        return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
4235    }
4236
4237
4238    /**
4239     * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units.
4240     *
4241     * @param lead the lead char
4242     * @param trail the trail char
4243     * @return code point if surrogate characters are valid.
4244     * @exception IllegalArgumentException thrown when the code units do
4245     *            not form a valid code point
4246     */
4247    public static int getCodePoint(char lead, char trail)
4248    {
4249        if (Character.isSurrogatePair(lead, trail)) {
4250            return Character.toCodePoint(lead, trail);
4251        }
4252        throw new IllegalArgumentException("Illegal surrogate characters");
4253    }
4254
4255    /**
4256     * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point.
4257     *
4258     * @param char16 the BMP code point
4259     * @return code point if argument is a valid character.
4260     * @exception IllegalArgumentException thrown when char16 is not a valid
4261     *            code point
4262     */
4263    public static int getCodePoint(char char16)
4264    {
4265        if (UCharacter.isLegal(char16)) {
4266            return char16;
4267        }
4268        throw new IllegalArgumentException("Illegal codepoint");
4269    }
4270
4271    /**
4272     * Returns the uppercase version of the argument string.
4273     * Casing is dependent on the default locale and context-sensitive.
4274     * @param str source string to be performed on
4275     * @return uppercase version of the argument string
4276     */
4277    public static String toUpperCase(String str)
4278    {
4279        return toUpperCase(getDefaultCaseLocale(), str);
4280    }
4281
4282    /**
4283     * Returns the lowercase version of the argument string.
4284     * Casing is dependent on the default locale and context-sensitive
4285     * @param str source string to be performed on
4286     * @return lowercase version of the argument string
4287     */
4288    public static String toLowerCase(String str)
4289    {
4290        return toLowerCase(getDefaultCaseLocale(), str);
4291    }
4292
4293    /**
4294     * <p>Returns the titlecase version of the argument string.
4295     * <p>Position for titlecasing is determined by the argument break
4296     * iterator, hence the user can customize his break iterator for
4297     * a specialized titlecasing. In this case only the forward iteration
4298     * needs to be implemented.
4299     * If the break iterator passed in is null, the default Unicode algorithm
4300     * will be used to determine the titlecase positions.
4301     *
4302     * <p>Only positions returned by the break iterator will be title cased,
4303     * character in between the positions will all be in lower case.
4304     * <p>Casing is dependent on the default locale and context-sensitive
4305     * @param str source string to be performed on
4306     * @param breakiter break iterator to determine the positions in which
4307     *        the character should be title cased.
4308     * @return lowercase version of the argument string
4309     */
4310    public static String toTitleCase(String str, BreakIterator breakiter)
4311    {
4312        return toTitleCase(Locale.getDefault(), str, breakiter, 0);
4313    }
4314
4315    private static int getDefaultCaseLocale() {
4316        return UCaseProps.getCaseLocale(Locale.getDefault());
4317    }
4318
4319    private static int getCaseLocale(Locale locale) {
4320        if (locale == null) {
4321            locale = Locale.getDefault();
4322        }
4323        return UCaseProps.getCaseLocale(locale);
4324    }
4325
4326    private static int getCaseLocale(ULocale locale) {
4327        if (locale == null) {
4328            locale = ULocale.getDefault();
4329        }
4330        return UCaseProps.getCaseLocale(locale);
4331    }
4332
4333    private static String toLowerCase(int caseLocale, String str) {
4334        if (str.length() <= 100) {
4335            if (str.isEmpty()) {
4336                return str;
4337            }
4338            // Collect and apply only changes.
4339            // Good if no or few changes. Bad (slow) if many changes.
4340            Edits edits = new Edits();
4341            StringBuilder replacementChars = CaseMapImpl.toLower(
4342                    caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
4343            return applyEdits(str, replacementChars, edits);
4344        } else {
4345            return CaseMapImpl.toLower(caseLocale, 0, str,
4346                    new StringBuilder(str.length()), null).toString();
4347        }
4348    }
4349
4350    private static String toUpperCase(int caseLocale, String str) {
4351        if (str.length() <= 100) {
4352            if (str.isEmpty()) {
4353                return str;
4354            }
4355            // Collect and apply only changes.
4356            // Good if no or few changes. Bad (slow) if many changes.
4357            Edits edits = new Edits();
4358            StringBuilder replacementChars = CaseMapImpl.toUpper(
4359                    caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
4360            return applyEdits(str, replacementChars, edits);
4361        } else {
4362            return CaseMapImpl.toUpper(caseLocale, 0, str,
4363                    new StringBuilder(str.length()), null).toString();
4364        }
4365    }
4366
4367    private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
4368        if (str.length() <= 100) {
4369            if (str.isEmpty()) {
4370                return str;
4371            }
4372            // Collect and apply only changes.
4373            // Good if no or few changes. Bad (slow) if many changes.
4374            Edits edits = new Edits();
4375            StringBuilder replacementChars = CaseMapImpl.toTitle(
4376                    caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
4377                    new StringBuilder(), edits);
4378            return applyEdits(str, replacementChars, edits);
4379        } else {
4380            return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
4381                    new StringBuilder(str.length()), null).toString();
4382        }
4383    }
4384
4385    private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
4386        if (!edits.hasChanges()) {
4387            return str;
4388        }
4389        StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
4390        for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
4391            if (ei.hasChange()) {
4392                int i = ei.replacementIndex();
4393                result.append(replacementChars, i, i + ei.newLength());
4394            } else {
4395                int i = ei.sourceIndex();
4396                result.append(str, i, i + ei.oldLength());
4397            }
4398        }
4399        return result.toString();
4400    }
4401
4402    /**
4403     * Returns the uppercase version of the argument string.
4404     * Casing is dependent on the argument locale and context-sensitive.
4405     * @param locale which string is to be converted in
4406     * @param str source string to be performed on
4407     * @return uppercase version of the argument string
4408     */
4409    public static String toUpperCase(Locale locale, String str)
4410    {
4411        return toUpperCase(getCaseLocale(locale), str);
4412    }
4413
4414    /**
4415     * Returns the uppercase version of the argument string.
4416     * Casing is dependent on the argument locale and context-sensitive.
4417     * @param locale which string is to be converted in
4418     * @param str source string to be performed on
4419     * @return uppercase version of the argument string
4420     */
4421    public static String toUpperCase(ULocale locale, String str) {
4422        return toUpperCase(getCaseLocale(locale), str);
4423    }
4424
4425    /**
4426     * Returns the lowercase version of the argument string.
4427     * Casing is dependent on the argument locale and context-sensitive
4428     * @param locale which string is to be converted in
4429     * @param str source string to be performed on
4430     * @return lowercase version of the argument string
4431     */
4432    public static String toLowerCase(Locale locale, String str)
4433    {
4434        return toLowerCase(getCaseLocale(locale), str);
4435    }
4436
4437    /**
4438     * Returns the lowercase version of the argument string.
4439     * Casing is dependent on the argument locale and context-sensitive
4440     * @param locale which string is to be converted in
4441     * @param str source string to be performed on
4442     * @return lowercase version of the argument string
4443     */
4444    public static String toLowerCase(ULocale locale, String str) {
4445        return toLowerCase(getCaseLocale(locale), str);
4446    }
4447
4448    /**
4449     * <p>Returns the titlecase version of the argument string.
4450     * <p>Position for titlecasing is determined by the argument break
4451     * iterator, hence the user can customize his break iterator for
4452     * a specialized titlecasing. In this case only the forward iteration
4453     * needs to be implemented.
4454     * If the break iterator passed in is null, the default Unicode algorithm
4455     * will be used to determine the titlecase positions.
4456     *
4457     * <p>Only positions returned by the break iterator will be title cased,
4458     * character in between the positions will all be in lower case.
4459     * <p>Casing is dependent on the argument locale and context-sensitive
4460     * @param locale which string is to be converted in
4461     * @param str source string to be performed on
4462     * @param breakiter break iterator to determine the positions in which
4463     *        the character should be title cased.
4464     * @return lowercase version of the argument string
4465     */
4466    public static String toTitleCase(Locale locale, String str,
4467            BreakIterator breakiter)
4468    {
4469        return toTitleCase(locale, str, breakiter, 0);
4470    }
4471
4472    /**
4473     * <p>Returns the titlecase version of the argument string.
4474     * <p>Position for titlecasing is determined by the argument break
4475     * iterator, hence the user can customize his break iterator for
4476     * a specialized titlecasing. In this case only the forward iteration
4477     * needs to be implemented.
4478     * If the break iterator passed in is null, the default Unicode algorithm
4479     * will be used to determine the titlecase positions.
4480     *
4481     * <p>Only positions returned by the break iterator will be title cased,
4482     * character in between the positions will all be in lower case.
4483     * <p>Casing is dependent on the argument locale and context-sensitive
4484     * @param locale which string is to be converted in
4485     * @param str source string to be performed on
4486     * @param titleIter break iterator to determine the positions in which
4487     *        the character should be title cased.
4488     * @return lowercase version of the argument string
4489     */
4490    public static String toTitleCase(ULocale locale, String str,
4491            BreakIterator titleIter) {
4492        return toTitleCase(locale, str, titleIter, 0);
4493    }
4494
4495    /**
4496     * <p>Returns the titlecase version of the argument string.
4497     * <p>Position for titlecasing is determined by the argument break
4498     * iterator, hence the user can customize his break iterator for
4499     * a specialized titlecasing. In this case only the forward iteration
4500     * needs to be implemented.
4501     * If the break iterator passed in is null, the default Unicode algorithm
4502     * will be used to determine the titlecase positions.
4503     *
4504     * <p>Only positions returned by the break iterator will be title cased,
4505     * character in between the positions will all be in lower case.
4506     * <p>Casing is dependent on the argument locale and context-sensitive
4507     * @param locale which string is to be converted in
4508     * @param str source string to be performed on
4509     * @param titleIter break iterator to determine the positions in which
4510     *        the character should be title cased.
4511     * @param options bit set to modify the titlecasing operation
4512     * @return lowercase version of the argument string
4513     * @see #TITLECASE_NO_LOWERCASE
4514     * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4515     */
4516    public static String toTitleCase(ULocale locale, String str,
4517            BreakIterator titleIter, int options) {
4518        if(titleIter == null) {
4519            if (locale == null) {
4520                locale = ULocale.getDefault();
4521            }
4522            titleIter = BreakIterator.getWordInstance(locale);
4523        }
4524        titleIter.setText(str);
4525        return toTitleCase(getCaseLocale(locale), options, titleIter, str);
4526    }
4527
4528
4529    private static final int BREAK_MASK =
4530            (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER)
4531            | (1<<UCharacterCategory.OTHER_LETTER)
4532            | (1<<UCharacterCategory.MODIFIER_LETTER);
4533
4534    /**
4535     * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
4536     * and sometimes has no effect at all; the original string is returned whenever casing
4537     * would not be appropriate for the first word (such as for CJK characters or initial numbers).
4538     * Initial non-letters are skipped in order to find the character to change.
4539     * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
4540     * <p>Examples:
4541     * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
4542     * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
4543     * <tr><td>“contact us”</td><td>“Contact us”</td></tr>
4544     * <tr><td>49ers win!</td><td>49ers win!</td></tr>
4545     * <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
4546     * <tr><td>«ijs»</td><td>«Ijs»</td></tr>
4547     * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
4548     * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
4549     * </table>
4550     * @param locale the locale for accessing exceptional behavior (eg for tr).
4551     * @param str the source string to change
4552     * @return the modified string, or the original if no modifications were necessary.
4553     * @deprecated ICU internal only
4554     * @hide original deprecated declaration
4555     * @hide draft / provisional / internal are hidden on Android
4556     */
4557    @Deprecated
4558    public static String toTitleFirst(ULocale locale, String str) {
4559        int c = 0;
4560        for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) {
4561            c = UCharacter.codePointAt(str, i);
4562            int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK);
4563            if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK
4564                break;
4565            }
4566            if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) {
4567                continue;
4568            }
4569
4570            // we now have the first cased character
4571            // What we really want is something like:
4572            // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken);
4573            // That is, just give us the titlecased string, for the locale, at i and following,
4574            // and tell us how many characters are replaced.
4575            // The following won't work completely: it needs some more substantial changes to UCaseProps
4576
4577            String substring = str.substring(i, i+UCharacter.charCount(c));
4578            String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0);
4579
4580            // skip if no change
4581            if (titled.codePointAt(0) == c) {
4582                // Using 0 is safe, since any change in titling will not have first initial character
4583                break;
4584            }
4585            StringBuilder result = new StringBuilder(str.length()).append(str, 0, i);
4586            int startOfSuffix;
4587
4588            // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps.
4589
4590            if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') {
4591                result.append("IJ");
4592                startOfSuffix = 2;
4593            } else {
4594                result.append(titled);
4595                startOfSuffix = i + UCharacter.charCount(c);
4596            }
4597
4598            // add the remainder, and return
4599            return result.append(str, startOfSuffix, str.length()).toString();
4600        }
4601        return str; // no change
4602    }
4603
4604    /**
4605     * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string.
4606     * <p>Position for titlecasing is determined by the argument break
4607     * iterator, hence the user can customize his break iterator for
4608     * a specialized titlecasing. In this case only the forward iteration
4609     * needs to be implemented.
4610     * If the break iterator passed in is null, the default Unicode algorithm
4611     * will be used to determine the titlecase positions.
4612     *
4613     * <p>Only positions returned by the break iterator will be title cased,
4614     * character in between the positions will all be in lower case.
4615     * <p>Casing is dependent on the argument locale and context-sensitive
4616     * @param locale which string is to be converted in
4617     * @param str source string to be performed on
4618     * @param titleIter break iterator to determine the positions in which
4619     *        the character should be title cased.
4620     * @param options bit set to modify the titlecasing operation
4621     * @return lowercase version of the argument string
4622     * @see #TITLECASE_NO_LOWERCASE
4623     * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4624     */
4625    public static String toTitleCase(Locale locale, String str,
4626            BreakIterator titleIter,
4627            int options) {
4628        if(titleIter == null) {
4629            titleIter = BreakIterator.getWordInstance(locale);
4630        }
4631        titleIter.setText(str);
4632        return toTitleCase(getCaseLocale(locale), options, titleIter, str);
4633    }
4634
4635    /**
4636     * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4637     * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4638     * folding equivalent, the character itself is returned.
4639     *
4640     * <p>This function only returns the simple, single-code point case mapping.
4641     * Full case mappings should be used whenever possible because they produce
4642     * better results by working on whole strings.
4643     * They can map to a result string with a different length as appropriate.
4644     * Full case mappings are applied by the case mapping functions
4645     * that take String parameters rather than code points (int).
4646     * See also the User Guide chapter on C/POSIX migration:
4647     * http://www.icu-project.org/userguide/posix.html#case_mappings
4648     *
4649     * @param ch             the character to be converted
4650     * @param defaultmapping Indicates whether the default mappings defined in
4651     *                       CaseFolding.txt are to be used, otherwise the
4652     *                       mappings for dotted I and dotless i marked with
4653     *                       'T' in CaseFolding.txt are included.
4654     * @return               the case folding equivalent of the character, if
4655     *                       any; otherwise the character itself.
4656     * @see                  #foldCase(String, boolean)
4657     */
4658    public static int foldCase(int ch, boolean defaultmapping) {
4659        return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4660    }
4661
4662    /**
4663     * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4664     * UnicodeData.txt and CaseFolding.txt; if any character has no case
4665     * folding equivalent, the character itself is returned.
4666     * "Full", multiple-code point case folding mappings are returned here.
4667     * For "simple" single-code point mappings use the API
4668     * foldCase(int ch, boolean defaultmapping).
4669     * @param str            the String to be converted
4670     * @param defaultmapping Indicates whether the default mappings defined in
4671     *                       CaseFolding.txt are to be used, otherwise the
4672     *                       mappings for dotted I and dotless i marked with
4673     *                       'T' in CaseFolding.txt are included.
4674     * @return               the case folding equivalent of the character, if
4675     *                       any; otherwise the character itself.
4676     * @see                  #foldCase(int, boolean)
4677     */
4678    public static String foldCase(String str, boolean defaultmapping) {
4679        return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4680    }
4681
4682    /**
4683     * <strong>[icu]</strong> Option value for case folding: use default mappings defined in
4684     * CaseFolding.txt.
4685     */
4686    public static final int FOLD_CASE_DEFAULT    =      0x0000;
4687    /**
4688     * <strong>[icu]</strong> Option value for case folding:
4689     * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
4690     * and dotless i appropriately for Turkic languages (tr, az).
4691     *
4692     * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
4693     * are to be included for default mappings and
4694     * excluded for the Turkic-specific mappings.
4695     *
4696     * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
4697     * are to be excluded for default mappings and
4698     * included for the Turkic-specific mappings.
4699     */
4700    public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4701
4702    /**
4703     * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4704     * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4705     * folding equivalent, the character itself is returned.
4706     *
4707     * <p>This function only returns the simple, single-code point case mapping.
4708     * Full case mappings should be used whenever possible because they produce
4709     * better results by working on whole strings.
4710     * They can map to a result string with a different length as appropriate.
4711     * Full case mappings are applied by the case mapping functions
4712     * that take String parameters rather than code points (int).
4713     * See also the User Guide chapter on C/POSIX migration:
4714     * http://www.icu-project.org/userguide/posix.html#case_mappings
4715     *
4716     * @param ch the character to be converted
4717     * @param options A bit set for special processing. Currently the recognised options
4718     * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4719     * @return the case folding equivalent of the character, if any; otherwise the
4720     * character itself.
4721     * @see #foldCase(String, boolean)
4722     */
4723    public static int foldCase(int ch, int options) {
4724        return UCaseProps.INSTANCE.fold(ch, options);
4725    }
4726
4727    /**
4728     * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4729     * UnicodeData.txt and CaseFolding.txt; if any character has no case
4730     * folding equivalent, the character itself is returned.
4731     * "Full", multiple-code point case folding mappings are returned here.
4732     * For "simple" single-code point mappings use the API
4733     * foldCase(int ch, boolean defaultmapping).
4734     * @param str the String to be converted
4735     * @param options A bit set for special processing. Currently the recognised options
4736     *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4737     * @return the case folding equivalent of the character, if any; otherwise the
4738     *         character itself.
4739     * @see #foldCase(int, boolean)
4740     */
4741    public static final String foldCase(String str, int options) {
4742        if (str.length() <= 100) {
4743            if (str.isEmpty()) {
4744                return str;
4745            }
4746            // Collect and apply only changes.
4747            // Good if no or few changes. Bad (slow) if many changes.
4748            Edits edits = new Edits();
4749            StringBuilder replacementChars = CaseMapImpl.fold(
4750                    options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
4751            return applyEdits(str, replacementChars, edits);
4752        } else {
4753            return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
4754        }
4755    }
4756
4757    /**
4758     * <strong>[icu]</strong> Returns the numeric value of a Han character.
4759     *
4760     * <p>This returns the value of Han 'numeric' code points,
4761     * including those for zero, ten, hundred, thousand, ten thousand,
4762     * and hundred million.
4763     * This includes both the standard and 'checkwriting'
4764     * characters, the 'big circle' zero character, and the standard
4765     * zero character.
4766     *
4767     * <p>Note: The Unicode Standard has numeric values for more
4768     * Han characters recognized by this method
4769     * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
4770     * and a {@link android.icu.text.NumberFormat} can be used with
4771     * a Chinese {@link android.icu.text.NumberingSystem}.
4772     *
4773     * @param ch code point to query
4774     * @return value if it is a Han 'numeric character,' otherwise return -1.
4775     */
4776    public static int getHanNumericValue(int ch)
4777    {
4778        switch(ch)
4779        {
4780        case IDEOGRAPHIC_NUMBER_ZERO_ :
4781        case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
4782            return 0; // Han Zero
4783        case CJK_IDEOGRAPH_FIRST_ :
4784        case CJK_IDEOGRAPH_COMPLEX_ONE_ :
4785            return 1; // Han One
4786        case CJK_IDEOGRAPH_SECOND_ :
4787        case CJK_IDEOGRAPH_COMPLEX_TWO_ :
4788            return 2; // Han Two
4789        case CJK_IDEOGRAPH_THIRD_ :
4790        case CJK_IDEOGRAPH_COMPLEX_THREE_ :
4791            return 3; // Han Three
4792        case CJK_IDEOGRAPH_FOURTH_ :
4793        case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
4794            return 4; // Han Four
4795        case CJK_IDEOGRAPH_FIFTH_ :
4796        case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
4797            return 5; // Han Five
4798        case CJK_IDEOGRAPH_SIXTH_ :
4799        case CJK_IDEOGRAPH_COMPLEX_SIX_ :
4800            return 6; // Han Six
4801        case CJK_IDEOGRAPH_SEVENTH_ :
4802        case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
4803            return 7; // Han Seven
4804        case CJK_IDEOGRAPH_EIGHTH_ :
4805        case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
4806            return 8; // Han Eight
4807        case CJK_IDEOGRAPH_NINETH_ :
4808        case CJK_IDEOGRAPH_COMPLEX_NINE_ :
4809            return 9; // Han Nine
4810        case CJK_IDEOGRAPH_TEN_ :
4811        case CJK_IDEOGRAPH_COMPLEX_TEN_ :
4812            return 10;
4813        case CJK_IDEOGRAPH_HUNDRED_ :
4814        case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
4815            return 100;
4816        case CJK_IDEOGRAPH_THOUSAND_ :
4817        case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
4818            return 1000;
4819        case CJK_IDEOGRAPH_TEN_THOUSAND_ :
4820            return 10000;
4821        case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
4822            return 100000000;
4823        }
4824        return -1; // no value
4825    }
4826
4827    /**
4828     * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints.
4829     * <p>Example of use:<br>
4830     * <pre>
4831     * RangeValueIterator iterator = UCharacter.getTypeIterator();
4832     * RangeValueIterator.Element element = new RangeValueIterator.Element();
4833     * while (iterator.next(element)) {
4834     *     System.out.println("Codepoint \\u" +
4835     *                        Integer.toHexString(element.start) +
4836     *                        " to codepoint \\u" +
4837     *                        Integer.toHexString(element.limit - 1) +
4838     *                        " has the character type " +
4839     *                        element.value);
4840     * }
4841     * </pre>
4842     * @return an iterator
4843     */
4844    public static RangeValueIterator getTypeIterator()
4845    {
4846        return new UCharacterTypeIterator();
4847    }
4848
4849    private static final class UCharacterTypeIterator implements RangeValueIterator {
4850        UCharacterTypeIterator() {
4851            reset();
4852        }
4853
4854        // implements RangeValueIterator
4855        @Override
4856        public boolean next(Element element) {
4857            if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
4858                element.start=range.startCodePoint;
4859                element.limit=range.endCodePoint+1;
4860                element.value=range.value;
4861                return true;
4862            } else {
4863                return false;
4864            }
4865        }
4866
4867        // implements RangeValueIterator
4868        @Override
4869        public void reset() {
4870            trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
4871        }
4872
4873        private Iterator<Trie2.Range> trieIterator;
4874        private Trie2.Range range;
4875
4876        private static final class MaskType implements Trie2.ValueMapper {
4877            // Extracts the general category ("character type") from the trie value.
4878            @Override
4879            public int map(int value) {
4880                return value & UCharacterProperty.TYPE_MASK;
4881            }
4882        }
4883        private static final MaskType MASK_TYPE=new MaskType();
4884    }
4885
4886    /**
4887     * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
4888     * <p>This API only gets the iterator for the modern, most up-to-date
4889     * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
4890     * for extended names use getExtendedNameIterator().
4891     * <p>Example of use:<br>
4892     * <pre>
4893     * ValueIterator iterator = UCharacter.getNameIterator();
4894     * ValueIterator.Element element = new ValueIterator.Element();
4895     * while (iterator.next(element)) {
4896     *     System.out.println("Codepoint \\u" +
4897     *                        Integer.toHexString(element.codepoint) +
4898     *                        " has the name " + (String)element.value);
4899     * }
4900     * </pre>
4901     * <p>The maximal range which the name iterator iterates is from
4902     * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
4903     * @return an iterator
4904     */
4905    public static ValueIterator getNameIterator(){
4906        return new UCharacterNameIterator(UCharacterName.INSTANCE,
4907                UCharacterNameChoice.UNICODE_CHAR_NAME);
4908    }
4909
4910    /**
4911     * <strong>[icu]</strong> Returns an empty iterator.
4912     * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
4913     * @return an empty iterator
4914     * @deprecated ICU 49
4915     * @see #getName1_0(int)
4916     * @hide original deprecated declaration
4917     */
4918    @Deprecated
4919    public static ValueIterator getName1_0Iterator(){
4920        return new DummyValueIterator();
4921    }
4922
4923    private static final class DummyValueIterator implements ValueIterator {
4924        @Override
4925        public boolean next(Element element) { return false; }
4926        @Override
4927        public void reset() {}
4928        @Override
4929        public void setRange(int start, int limit) {}
4930    }
4931
4932    /**
4933     * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
4934     * <p>This API only gets the iterator for the extended names.
4935     * For modern, most up-to-date Unicode names use getNameIterator() or
4936     * for older 1.0 Unicode names use get1_0NameIterator().
4937     * <p>Example of use:<br>
4938     * <pre>
4939     * ValueIterator iterator = UCharacter.getExtendedNameIterator();
4940     * ValueIterator.Element element = new ValueIterator.Element();
4941     * while (iterator.next(element)) {
4942     *     System.out.println("Codepoint \\u" +
4943     *                        Integer.toHexString(element.codepoint) +
4944     *                        " has the name " + (String)element.value);
4945     * }
4946     * </pre>
4947     * <p>The maximal range which the name iterator iterates is from
4948     * @return an iterator
4949     */
4950    public static ValueIterator getExtendedNameIterator(){
4951        return new UCharacterNameIterator(UCharacterName.INSTANCE,
4952                UCharacterNameChoice.EXTENDED_CHAR_NAME);
4953    }
4954
4955    /**
4956     * <strong>[icu]</strong> Returns the "age" of the code point.
4957     * <p>The "age" is the Unicode version when the code point was first
4958     * designated (as a non-character or for Private Use) or assigned a
4959     * character.
4960     * <p>This can be useful to avoid emitting code points to receiving
4961     * processes that do not accept newer characters.
4962     * <p>The data is from the UCD file DerivedAge.txt.
4963     * @param ch The code point.
4964     * @return the Unicode version number
4965     */
4966    public static VersionInfo getAge(int ch)
4967    {
4968        if (ch < MIN_VALUE || ch > MAX_VALUE) {
4969            throw new IllegalArgumentException("Codepoint out of bounds");
4970        }
4971        return UCharacterProperty.INSTANCE.getAge(ch);
4972    }
4973
4974    /**
4975     * <strong>[icu]</strong> <p>Check a binary Unicode property for a code point.
4976     * <p>Unicode, especially in version 3.2, defines many more properties
4977     * than the original set in UnicodeData.txt.
4978     * <p>This API is intended to reflect Unicode properties as defined in
4979     * the Unicode Character Database (UCD) and Unicode Technical Reports
4980     * (UTR).
4981     * <p>For details about the properties see
4982     * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
4983     * <p>For names of Unicode properties see the UCD file
4984     * PropertyAliases.txt.
4985     * <p>This API does not check the validity of the codepoint.
4986     * <p>Important: If ICU is built with UCD files from Unicode versions
4987     * below 3.2, then properties marked with "new" are not or
4988     * not fully available.
4989     * @param ch code point to test.
4990     * @param property selector constant from android.icu.lang.UProperty,
4991     *        identifies which binary property to check.
4992     * @return true or false according to the binary Unicode property value
4993     *         for ch. Also false if property is out of bounds or if the
4994     *         Unicode version does not have data for the property at all, or
4995     *         not for this code point.
4996     * @see android.icu.lang.UProperty
4997     */
4998    public static boolean hasBinaryProperty(int ch, int property)
4999    {
5000        return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5001    }
5002
5003    /**
5004     * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property.
5005     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5006     * <p>Different from UCharacter.isLetter(ch)!
5007     * @param ch codepoint to be tested
5008     */
5009    public static boolean isUAlphabetic(int ch)
5010    {
5011        return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5012    }
5013
5014    /**
5015     * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property.
5016     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5017     * <p>This is different from UCharacter.isLowerCase(ch)!
5018     * @param ch codepoint to be tested
5019     */
5020    public static boolean isULowercase(int ch)
5021    {
5022        return hasBinaryProperty(ch, UProperty.LOWERCASE);
5023    }
5024
5025    /**
5026     * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property.
5027     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5028     * <p>This is different from UCharacter.isUpperCase(ch)!
5029     * @param ch codepoint to be tested
5030     */
5031    public static boolean isUUppercase(int ch)
5032    {
5033        return hasBinaryProperty(ch, UProperty.UPPERCASE);
5034    }
5035
5036    /**
5037     * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property.
5038     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5039     * <p>This is different from both UCharacter.isSpace(ch) and
5040     * UCharacter.isWhitespace(ch)!
5041     * @param ch codepoint to be tested
5042     */
5043    public static boolean isUWhiteSpace(int ch)
5044    {
5045        return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5046    }
5047
5048    /**
5049     * <strong>[icu]</strong> <p>Returns the property value for an Unicode property type of a code point.
5050     * Also returns binary and mask property values.
5051     * <p>Unicode, especially in version 3.2, defines many more properties than
5052     * the original set in UnicodeData.txt.
5053     * <p>The properties APIs are intended to reflect Unicode properties as
5054     * defined in the Unicode Character Database (UCD) and Unicode Technical
5055     * Reports (UTR). For details about the properties see
5056     * http://www.unicode.org/.
5057     * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5058     *
5059     * <pre>
5060     * Sample usage:
5061     * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5062     * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5063     * boolean b = (ideo == 1) ? true : false;
5064     * </pre>
5065     * @param ch code point to test.
5066     * @param type UProperty selector constant, identifies which binary
5067     *        property to check. Must be
5068     *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5069     *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5070     *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5071     * @return numeric value that is directly the property value or,
5072     *         for enumerated properties, corresponds to the numeric value of
5073     *         the enumerated constant of the respective property value
5074     *         enumeration type (cast to enum type if necessary).
5075     *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5076     *         Returns a bit-mask for mask properties.
5077     *         Returns 0 if 'type' is out of bounds or if the Unicode version
5078     *         does not have data for the property at all, or not for this code
5079     *         point.
5080     * @see UProperty
5081     * @see #hasBinaryProperty
5082     * @see #getIntPropertyMinValue
5083     * @see #getIntPropertyMaxValue
5084     * @see #getUnicodeVersion
5085     */
5086    public static int getIntPropertyValue(int ch, int type)
5087    {
5088        return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5089    }
5090    /**
5091     * <strong>[icu]</strong> Returns a string version of the property value.
5092     * @param propertyEnum The property enum value.
5093     * @param codepoint The codepoint value.
5094     * @param nameChoice The choice of the name.
5095     * @return value as string
5096     * @deprecated This API is ICU internal only.
5097     * @hide original deprecated declaration
5098     * @hide draft / provisional / internal are hidden on Android
5099     */
5100    @Deprecated
5101    ///CLOVER:OFF
5102    public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5103        if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5104                (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5105            return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5106                    nameChoice);
5107        }
5108        if (propertyEnum == UProperty.NUMERIC_VALUE) {
5109            return String.valueOf(getUnicodeNumericValue(codepoint));
5110        }
5111        // otherwise must be string property
5112        switch (propertyEnum) {
5113        case UProperty.AGE: return getAge(codepoint).toString();
5114        case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5115        case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5116        case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5117        case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5118        case UProperty.NAME: return getName(codepoint);
5119        case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5120        case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5121        case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5122        case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5123        case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5124        case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5125        case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5126        }
5127        throw new IllegalArgumentException("Illegal Property Enum");
5128    }
5129    ///CLOVER:ON
5130
5131    /**
5132     * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type.
5133     * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5134     * to allocate arrays of android.icu.text.UnicodeSet or similar.
5135     * @param type UProperty selector constant, identifies which binary
5136     *        property to check. Must be
5137     *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5138     *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5139     * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5140     *         for a Unicode property. 0 if the property
5141     *         selector 'type' is out of range.
5142     * @see UProperty
5143     * @see #hasBinaryProperty
5144     * @see #getUnicodeVersion
5145     * @see #getIntPropertyMaxValue
5146     * @see #getIntPropertyValue
5147     */
5148    public static int getIntPropertyMinValue(int type){
5149
5150        return 0; // undefined; and: all other properties have a minimum value of 0
5151    }
5152
5153
5154    /**
5155     * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property.
5156     * Can be used together with UCharacter.getIntPropertyMinValue(int)
5157     * to allocate arrays of android.icu.text.UnicodeSet or similar.
5158     * Examples for min/max values (for Unicode 3.2):
5159     * <ul>
5160     * <li> UProperty.BIDI_CLASS:    0/18
5161     * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5162     * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5163     * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5164     * </ul>
5165     * For undefined UProperty constant values, min/max values will be 0/-1.
5166     * @param type UProperty selector constant, identifies which binary
5167     *        property to check. Must be
5168     *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5169     *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5170     * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5171     *         property. &lt;= 0 if the property selector 'type' is out of range.
5172     * @see UProperty
5173     * @see #hasBinaryProperty
5174     * @see #getUnicodeVersion
5175     * @see #getIntPropertyMaxValue
5176     * @see #getIntPropertyValue
5177     */
5178    public static int getIntPropertyMaxValue(int type)
5179    {
5180        return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5181    }
5182
5183    /**
5184     * Provide the java.lang.Character forDigit API, for convenience.
5185     */
5186    public static char forDigit(int digit, int radix) {
5187        return java.lang.Character.forDigit(digit, radix);
5188    }
5189
5190    // JDK 1.5 API coverage
5191
5192    /**
5193     * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
5194     */
5195    public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
5196
5197    /**
5198     * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
5199     */
5200    public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
5201
5202    /**
5203     * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
5204     */
5205    public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
5206
5207    /**
5208     * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
5209     */
5210    public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
5211
5212    /**
5213     * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
5214     */
5215    public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
5216
5217    /**
5218     * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
5219     */
5220    public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
5221
5222    /**
5223     * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
5224     */
5225    public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
5226
5227    /**
5228     * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
5229     */
5230    public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
5231
5232    /**
5233     * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
5234     */
5235    public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
5236
5237    /**
5238     * Equivalent to {@link Character#isValidCodePoint}.
5239     *
5240     * @param cp the code point to check
5241     * @return true if cp is a valid code point
5242     */
5243    public static final boolean isValidCodePoint(int cp) {
5244        return cp >= 0 && cp <= MAX_CODE_POINT;
5245    }
5246
5247    /**
5248     * Same as {@link Character#isSupplementaryCodePoint}.
5249     *
5250     * @param cp the code point to check
5251     * @return true if cp is a supplementary code point
5252     */
5253    public static final boolean isSupplementaryCodePoint(int cp) {
5254        return Character.isSupplementaryCodePoint(cp);
5255    }
5256
5257    /**
5258     * Same as {@link Character#isHighSurrogate}.
5259     *
5260     * @param ch the char to check
5261     * @return true if ch is a high (lead) surrogate
5262     */
5263    public static boolean isHighSurrogate(char ch) {
5264        return Character.isHighSurrogate(ch);
5265    }
5266
5267    /**
5268     * Same as {@link Character#isLowSurrogate}.
5269     *
5270     * @param ch the char to check
5271     * @return true if ch is a low (trail) surrogate
5272     */
5273    public static boolean isLowSurrogate(char ch) {
5274        return Character.isLowSurrogate(ch);
5275    }
5276
5277    /**
5278     * Same as {@link Character#isSurrogatePair}.
5279     *
5280     * @param high the high (lead) char
5281     * @param low the low (trail) char
5282     * @return true if high, low form a surrogate pair
5283     */
5284    public static final boolean isSurrogatePair(char high, char low) {
5285        return Character.isSurrogatePair(high, low);
5286    }
5287
5288    /**
5289     * Same as {@link Character#charCount}.
5290     * Returns the number of chars needed to represent the code point (1 or 2).
5291     * This does not check the code point for validity.
5292     *
5293     * @param cp the code point to check
5294     * @return the number of chars needed to represent the code point
5295     */
5296    public static int charCount(int cp) {
5297        return Character.charCount(cp);
5298    }
5299
5300    /**
5301     * Same as {@link Character#toCodePoint}.
5302     * Returns the code point represented by the two surrogate code units.
5303     * This does not check the surrogate pair for validity.
5304     *
5305     * @param high the high (lead) surrogate
5306     * @param low the low (trail) surrogate
5307     * @return the code point formed by the surrogate pair
5308     */
5309    public static final int toCodePoint(char high, char low) {
5310        return Character.toCodePoint(high, low);
5311    }
5312
5313    /**
5314     * Same as {@link Character#codePointAt(CharSequence, int)}.
5315     * Returns the code point at index.
5316     * This examines only the characters at index and index+1.
5317     *
5318     * @param seq the characters to check
5319     * @param index the index of the first or only char forming the code point
5320     * @return the code point at the index
5321     */
5322    public static final int codePointAt(CharSequence seq, int index) {
5323        char c1 = seq.charAt(index++);
5324        if (isHighSurrogate(c1)) {
5325            if (index < seq.length()) {
5326                char c2 = seq.charAt(index);
5327                if (isLowSurrogate(c2)) {
5328                    return toCodePoint(c1, c2);
5329                }
5330            }
5331        }
5332        return c1;
5333    }
5334
5335    /**
5336     * Same as {@link Character#codePointAt(char[], int)}.
5337     * Returns the code point at index.
5338     * This examines only the characters at index and index+1.
5339     *
5340     * @param text the characters to check
5341     * @param index the index of the first or only char forming the code point
5342     * @return the code point at the index
5343     */
5344    public static final int codePointAt(char[] text, int index) {
5345        char c1 = text[index++];
5346        if (isHighSurrogate(c1)) {
5347            if (index < text.length) {
5348                char c2 = text[index];
5349                if (isLowSurrogate(c2)) {
5350                    return toCodePoint(c1, c2);
5351                }
5352            }
5353        }
5354        return c1;
5355    }
5356
5357    /**
5358     * Same as {@link Character#codePointAt(char[], int, int)}.
5359     * Returns the code point at index.
5360     * This examines only the characters at index and index+1.
5361     *
5362     * @param text the characters to check
5363     * @param index the index of the first or only char forming the code point
5364     * @param limit the limit of the valid text
5365     * @return the code point at the index
5366     */
5367    public static final int codePointAt(char[] text, int index, int limit) {
5368        if (index >= limit || limit > text.length) {
5369            throw new IndexOutOfBoundsException();
5370        }
5371        char c1 = text[index++];
5372        if (isHighSurrogate(c1)) {
5373            if (index < limit) {
5374                char c2 = text[index];
5375                if (isLowSurrogate(c2)) {
5376                    return toCodePoint(c1, c2);
5377                }
5378            }
5379        }
5380        return c1;
5381    }
5382
5383    /**
5384     * Same as {@link Character#codePointBefore(CharSequence, int)}.
5385     * Return the code point before index.
5386     * This examines only the characters at index-1 and index-2.
5387     *
5388     * @param seq the characters to check
5389     * @param index the index after the last or only char forming the code point
5390     * @return the code point before the index
5391     */
5392    public static final int codePointBefore(CharSequence seq, int index) {
5393        char c2 = seq.charAt(--index);
5394        if (isLowSurrogate(c2)) {
5395            if (index > 0) {
5396                char c1 = seq.charAt(--index);
5397                if (isHighSurrogate(c1)) {
5398                    return toCodePoint(c1, c2);
5399                }
5400            }
5401        }
5402        return c2;
5403    }
5404
5405    /**
5406     * Same as {@link Character#codePointBefore(char[], int)}.
5407     * Returns the code point before index.
5408     * This examines only the characters at index-1 and index-2.
5409     *
5410     * @param text the characters to check
5411     * @param index the index after the last or only char forming the code point
5412     * @return the code point before the index
5413     */
5414    public static final int codePointBefore(char[] text, int index) {
5415        char c2 = text[--index];
5416        if (isLowSurrogate(c2)) {
5417            if (index > 0) {
5418                char c1 = text[--index];
5419                if (isHighSurrogate(c1)) {
5420                    return toCodePoint(c1, c2);
5421                }
5422            }
5423        }
5424        return c2;
5425    }
5426
5427    /**
5428     * Same as {@link Character#codePointBefore(char[], int, int)}.
5429     * Return the code point before index.
5430     * This examines only the characters at index-1 and index-2.
5431     *
5432     * @param text the characters to check
5433     * @param index the index after the last or only char forming the code point
5434     * @param limit the start of the valid text
5435     * @return the code point before the index
5436     */
5437    public static final int codePointBefore(char[] text, int index, int limit) {
5438        if (index <= limit || limit < 0) {
5439            throw new IndexOutOfBoundsException();
5440        }
5441        char c2 = text[--index];
5442        if (isLowSurrogate(c2)) {
5443            if (index > limit) {
5444                char c1 = text[--index];
5445                if (isHighSurrogate(c1)) {
5446                    return toCodePoint(c1, c2);
5447                }
5448            }
5449        }
5450        return c2;
5451    }
5452
5453    /**
5454     * Same as {@link Character#toChars(int, char[], int)}.
5455     * Writes the chars representing the
5456     * code point into the destination at the given index.
5457     *
5458     * @param cp the code point to convert
5459     * @param dst the destination array into which to put the char(s) representing the code point
5460     * @param dstIndex the index at which to put the first (or only) char
5461     * @return the count of the number of chars written (1 or 2)
5462     * @throws IllegalArgumentException if cp is not a valid code point
5463     */
5464    public static final int toChars(int cp, char[] dst, int dstIndex) {
5465        return Character.toChars(cp, dst, dstIndex);
5466    }
5467
5468    /**
5469     * Same as {@link Character#toChars(int)}.
5470     * Returns a char array representing the code point.
5471     *
5472     * @param cp the code point to convert
5473     * @return an array containing the char(s) representing the code point
5474     * @throws IllegalArgumentException if cp is not a valid code point
5475     */
5476    public static final char[] toChars(int cp) {
5477        return Character.toChars(cp);
5478    }
5479
5480    /**
5481     * Equivalent to the {@link Character#getDirectionality(char)} method, for
5482     * convenience. Returns a byte representing the directionality of the
5483     * character.
5484     *
5485     * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns
5486     * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
5487     *
5488     * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link
5489     * UCharacterDirection} and its interface {@link
5490     * UCharacterEnums.ECharacterDirection} since the values are different from the ones
5491     * defined by <code>java.lang.Character</code>.
5492     * @param cp the code point to check
5493     * @return the directionality of the code point
5494     * @see #getDirection
5495     */
5496    public static byte getDirectionality(int cp)
5497    {
5498        return (byte)getDirection(cp);
5499    }
5500
5501    /**
5502     * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
5503     * method, for convenience.  Counts the number of code points in the range
5504     * of text.
5505     * @param text the characters to check
5506     * @param start the start of the range
5507     * @param limit the limit of the range
5508     * @return the number of code points in the range
5509     */
5510    public static int codePointCount(CharSequence text, int start, int limit) {
5511        if (start < 0 || limit < start || limit > text.length()) {
5512            throw new IndexOutOfBoundsException("start (" + start +
5513                    ") or limit (" + limit +
5514                    ") invalid or out of range 0, " + text.length());
5515        }
5516
5517        int len = limit - start;
5518        while (limit > start) {
5519            char ch = text.charAt(--limit);
5520            while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5521                ch = text.charAt(--limit);
5522                if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5523                    --len;
5524                    break;
5525                }
5526            }
5527        }
5528        return len;
5529    }
5530
5531    /**
5532     * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
5533     * convenience. Counts the number of code points in the range of text.
5534     * @param text the characters to check
5535     * @param start the start of the range
5536     * @param limit the limit of the range
5537     * @return the number of code points in the range
5538     */
5539    public static int codePointCount(char[] text, int start, int limit) {
5540        if (start < 0 || limit < start || limit > text.length) {
5541            throw new IndexOutOfBoundsException("start (" + start +
5542                    ") or limit (" + limit +
5543                    ") invalid or out of range 0, " + text.length);
5544        }
5545
5546        int len = limit - start;
5547        while (limit > start) {
5548            char ch = text[--limit];
5549            while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5550                ch = text[--limit];
5551                if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5552                    --len;
5553                    break;
5554                }
5555            }
5556        }
5557        return len;
5558    }
5559
5560    /**
5561     * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
5562     * method, for convenience.  Adjusts the char index by a code point offset.
5563     * @param text the characters to check
5564     * @param index the index to adjust
5565     * @param codePointOffset the number of code points by which to offset the index
5566     * @return the adjusted index
5567     */
5568    public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
5569        if (index < 0 || index > text.length()) {
5570            throw new IndexOutOfBoundsException("index ( " + index +
5571                    ") out of range 0, " + text.length());
5572        }
5573
5574        if (codePointOffset < 0) {
5575            while (++codePointOffset <= 0) {
5576                char ch = text.charAt(--index);
5577                while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5578                    ch = text.charAt(--index);
5579                    if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5580                        if (++codePointOffset > 0) {
5581                            return index+1;
5582                        }
5583                    }
5584                }
5585            }
5586        } else {
5587            int limit = text.length();
5588            while (--codePointOffset >= 0) {
5589                char ch = text.charAt(index++);
5590                while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5591                    ch = text.charAt(index++);
5592                    if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5593                        if (--codePointOffset < 0) {
5594                            return index-1;
5595                        }
5596                    }
5597                }
5598            }
5599        }
5600
5601        return index;
5602    }
5603
5604    /**
5605     * Equivalent to the
5606     * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
5607     * method, for convenience.  Adjusts the char index by a code point offset.
5608     * @param text the characters to check
5609     * @param start the start of the range to check
5610     * @param count the length of the range to check
5611     * @param index the index to adjust
5612     * @param codePointOffset the number of code points by which to offset the index
5613     * @return the adjusted index
5614     */
5615    public static int offsetByCodePoints(char[] text, int start, int count, int index,
5616            int codePointOffset) {
5617        int limit = start + count;
5618        if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
5619            throw new IndexOutOfBoundsException("index ( " + index +
5620                    ") out of range " + start +
5621                    ", " + limit +
5622                    " in array 0, " + text.length);
5623        }
5624
5625        if (codePointOffset < 0) {
5626            while (++codePointOffset <= 0) {
5627                char ch = text[--index];
5628                if (index < start) {
5629                    throw new IndexOutOfBoundsException("index ( " + index +
5630                            ") < start (" + start +
5631                            ")");
5632                }
5633                while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
5634                    ch = text[--index];
5635                    if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5636                        if (++codePointOffset > 0) {
5637                            return index+1;
5638                        }
5639                    }
5640                }
5641            }
5642        } else {
5643            while (--codePointOffset >= 0) {
5644                char ch = text[index++];
5645                if (index > limit) {
5646                    throw new IndexOutOfBoundsException("index ( " + index +
5647                            ") > limit (" + limit +
5648                            ")");
5649                }
5650                while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5651                    ch = text[index++];
5652                    if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5653                        if (--codePointOffset < 0) {
5654                            return index-1;
5655                        }
5656                    }
5657                }
5658            }
5659        }
5660
5661        return index;
5662    }
5663
5664    // private variables -------------------------------------------------
5665
5666    /**
5667     * To get the last character out from a data type
5668     */
5669    private static final int LAST_CHAR_MASK_ = 0xFFFF;
5670
5671    //    /**
5672    //     * To get the last byte out from a data type
5673    //     */
5674    //    private static final int LAST_BYTE_MASK_ = 0xFF;
5675    //
5676    //    /**
5677    //     * Shift 16 bits
5678    //     */
5679    //    private static final int SHIFT_16_ = 16;
5680    //
5681    //    /**
5682    //     * Shift 24 bits
5683    //     */
5684    //    private static final int SHIFT_24_ = 24;
5685    //
5686    //    /**
5687    //     * Decimal radix
5688    //     */
5689    //    private static final int DECIMAL_RADIX_ = 10;
5690
5691    /**
5692     * No break space code point
5693     */
5694    private static final int NO_BREAK_SPACE_ = 0xA0;
5695
5696    /**
5697     * Figure space code point
5698     */
5699    private static final int FIGURE_SPACE_ = 0x2007;
5700
5701    /**
5702     * Narrow no break space code point
5703     */
5704    private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
5705
5706    /**
5707     * Ideographic number zero code point
5708     */
5709    private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
5710
5711    /**
5712     * CJK Ideograph, First code point
5713     */
5714    private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
5715
5716    /**
5717     * CJK Ideograph, Second code point
5718     */
5719    private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
5720
5721    /**
5722     * CJK Ideograph, Third code point
5723     */
5724    private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
5725
5726    /**
5727     * CJK Ideograph, Fourth code point
5728     */
5729    private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
5730
5731    /**
5732     * CJK Ideograph, FIFTH code point
5733     */
5734    private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
5735
5736    /**
5737     * CJK Ideograph, Sixth code point
5738     */
5739    private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
5740
5741    /**
5742     * CJK Ideograph, Seventh code point
5743     */
5744    private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
5745
5746    /**
5747     * CJK Ideograph, Eighth code point
5748     */
5749    private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
5750
5751    /**
5752     * CJK Ideograph, Nineth code point
5753     */
5754    private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
5755
5756    /**
5757     * Application Program command code point
5758     */
5759    private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
5760
5761    /**
5762     * Unit separator code point
5763     */
5764    private static final int UNIT_SEPARATOR_ = 0x001F;
5765
5766    /**
5767     * Delete code point
5768     */
5769    private static final int DELETE_ = 0x007F;
5770
5771    /**
5772     * Han digit characters
5773     */
5774    private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
5775    private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
5776    private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
5777    private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
5778    private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
5779    private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
5780    private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
5781    private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
5782    private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
5783    private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
5784    private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
5785    private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
5786    private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
5787    private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
5788    private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
5789    private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
5790    private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
5791    private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
5792
5793    // private constructor -----------------------------------------------
5794    ///CLOVER:OFF
5795    /**
5796     * Private constructor to prevent instantiation
5797     */
5798    private UCharacter()
5799    {
5800    }
5801    ///CLOVER:ON
5802}
5803