Character.java revision 48819fe0b9130618a430ec52b3f8526c4c0a5f8a
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21import java.util.Arrays;
22
23/**
24 * The wrapper for the primitive type {@code char}. This class also provides a
25 * number of utility methods for working with characters.
26 *
27 * <p>Character data is kept up to date as Unicode evolves.
28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
29 * the {@code Locale} documentation for details of the Unicode versions implemented by current
30 * and historical Android releases.
31 *
32 * <p>The Unicode specification, character tables, and other information are available at
33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
34 *
35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
39 * encoding and {@code char} pairs are used to represent code points in the
40 * supplementary range. A pair of {@code char} values that represent a
41 * supplementary character are made up of a <i>high surrogate</i> with a value
42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
43 * 0xDC00 to 0xDFFF.
44 * <p>
45 * On the Java platform a {@code char} value represents either a single BMP code
46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
47 * is used to represent all Unicode code points.
48 *
49 * <a name="unicode_categories"><h3>Unicode categories</h3></a>
50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
51 * grouped semantically to provide a convenient overview. This table is also useful in
52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
53 * <span class="datatable">
54 * <style type="text/css">
55 * .datatable td { padding-right: 20px; }
56 * </style>
57 * <p><table>
58 * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
59 * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
60 * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
62 * <tr> <td> Cs </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
63 * <tr> <td><br></td> </tr>
64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
67 * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
68 * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
69 * <tr> <td><br></td> </tr>
70 * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
71 * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
73 * <tr> <td><br></td> </tr>
74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
75 * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
76 * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
77 * <tr> <td><br></td> </tr>
78 * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
79 * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
80 * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
81 * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
83 * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
84 * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
85 * <tr> <td><br></td> </tr>
86 * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
89 * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
90 * <tr> <td><br></td> </tr>
91 * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
92 * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
94 * </table>
95 * </span>
96 *
97 * @since 1.0
98 */
99@FindBugsSuppressWarnings("DM_NUMBER_CTOR")
100public final class Character implements Serializable, Comparable<Character> {
101    private static final long serialVersionUID = 3786198910865385080L;
102
103    private final char value;
104
105    /**
106     * The minimum {@code Character} value.
107     */
108    public static final char MIN_VALUE = '\u0000';
109
110    /**
111     * The maximum {@code Character} value.
112     */
113    public static final char MAX_VALUE = '\uffff';
114
115    /**
116     * The minimum radix used for conversions between characters and integers.
117     */
118    public static final int MIN_RADIX = 2;
119
120    /**
121     * The maximum radix used for conversions between characters and integers.
122     */
123    public static final int MAX_RADIX = 36;
124
125    /**
126     * The {@link Class} object that represents the primitive type {@code char}.
127     */
128    @SuppressWarnings("unchecked")
129    public static final Class<Character> TYPE
130            = (Class<Character>) char[].class.getComponentType();
131    // Note: Character.TYPE can't be set to "char.class", since *that* is
132    // defined to be "java.lang.Character.TYPE";
133
134    /**
135     * Unicode category constant Cn.
136     */
137    public static final byte UNASSIGNED = 0;
138
139    /**
140     * Unicode category constant Lu.
141     */
142    public static final byte UPPERCASE_LETTER = 1;
143
144    /**
145     * Unicode category constant Ll.
146     */
147    public static final byte LOWERCASE_LETTER = 2;
148
149    /**
150     * Unicode category constant Lt.
151     */
152    public static final byte TITLECASE_LETTER = 3;
153
154    /**
155     * Unicode category constant Lm.
156     */
157    public static final byte MODIFIER_LETTER = 4;
158
159    /**
160     * Unicode category constant Lo.
161     */
162    public static final byte OTHER_LETTER = 5;
163
164    /**
165     * Unicode category constant Mn.
166     */
167    public static final byte NON_SPACING_MARK = 6;
168
169    /**
170     * Unicode category constant Me.
171     */
172    public static final byte ENCLOSING_MARK = 7;
173
174    /**
175     * Unicode category constant Mc.
176     */
177    public static final byte COMBINING_SPACING_MARK = 8;
178
179    /**
180     * Unicode category constant Nd.
181     */
182    public static final byte DECIMAL_DIGIT_NUMBER = 9;
183
184    /**
185     * Unicode category constant Nl.
186     */
187    public static final byte LETTER_NUMBER = 10;
188
189    /**
190     * Unicode category constant No.
191     */
192    public static final byte OTHER_NUMBER = 11;
193
194    /**
195     * Unicode category constant Zs.
196     */
197    public static final byte SPACE_SEPARATOR = 12;
198
199    /**
200     * Unicode category constant Zl.
201     */
202    public static final byte LINE_SEPARATOR = 13;
203
204    /**
205     * Unicode category constant Zp.
206     */
207    public static final byte PARAGRAPH_SEPARATOR = 14;
208
209    /**
210     * Unicode category constant Cc.
211     */
212    public static final byte CONTROL = 15;
213
214    /**
215     * Unicode category constant Cf.
216     */
217    public static final byte FORMAT = 16;
218
219    /**
220     * Unicode category constant Co.
221     */
222    public static final byte PRIVATE_USE = 18;
223
224    /**
225     * Unicode category constant Cs.
226     */
227    public static final byte SURROGATE = 19;
228
229    /**
230     * Unicode category constant Pd.
231     */
232    public static final byte DASH_PUNCTUATION = 20;
233
234    /**
235     * Unicode category constant Ps.
236     */
237    public static final byte START_PUNCTUATION = 21;
238
239    /**
240     * Unicode category constant Pe.
241     */
242    public static final byte END_PUNCTUATION = 22;
243
244    /**
245     * Unicode category constant Pc.
246     */
247    public static final byte CONNECTOR_PUNCTUATION = 23;
248
249    /**
250     * Unicode category constant Po.
251     */
252    public static final byte OTHER_PUNCTUATION = 24;
253
254    /**
255     * Unicode category constant Sm.
256     */
257    public static final byte MATH_SYMBOL = 25;
258
259    /**
260     * Unicode category constant Sc.
261     */
262    public static final byte CURRENCY_SYMBOL = 26;
263
264    /**
265     * Unicode category constant Sk.
266     */
267    public static final byte MODIFIER_SYMBOL = 27;
268
269    /**
270     * Unicode category constant So.
271     */
272    public static final byte OTHER_SYMBOL = 28;
273
274    /**
275     * Unicode category constant Pi.
276     *
277     * @since 1.4
278     */
279    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
280
281    /**
282     * Unicode category constant Pf.
283     *
284     * @since 1.4
285     */
286    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
287
288    /**
289     * Unicode bidirectional constant.
290     *
291     * @since 1.4
292     */
293    public static final byte DIRECTIONALITY_UNDEFINED = -1;
294
295    /**
296     * Unicode bidirectional constant L.
297     *
298     * @since 1.4
299     */
300    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
301
302    /**
303     * Unicode bidirectional constant R.
304     *
305     * @since 1.4
306     */
307    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
308
309    /**
310     * Unicode bidirectional constant AL.
311     *
312     * @since 1.4
313     */
314    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
315
316    /**
317     * Unicode bidirectional constant EN.
318     *
319     * @since 1.4
320     */
321    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
322
323    /**
324     * Unicode bidirectional constant ES.
325     *
326     * @since 1.4
327     */
328    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
329
330    /**
331     * Unicode bidirectional constant ET.
332     *
333     * @since 1.4
334     */
335    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
336
337    /**
338     * Unicode bidirectional constant AN.
339     *
340     * @since 1.4
341     */
342    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
343
344    /**
345     * Unicode bidirectional constant CS.
346     *
347     * @since 1.4
348     */
349    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
350
351    /**
352     * Unicode bidirectional constant NSM.
353     *
354     * @since 1.4
355     */
356    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
357
358    /**
359     * Unicode bidirectional constant BN.
360     *
361     * @since 1.4
362     */
363    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
364
365    /**
366     * Unicode bidirectional constant B.
367     *
368     * @since 1.4
369     */
370    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
371
372    /**
373     * Unicode bidirectional constant S.
374     *
375     * @since 1.4
376     */
377    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
378
379    /**
380     * Unicode bidirectional constant WS.
381     *
382     * @since 1.4
383     */
384    public static final byte DIRECTIONALITY_WHITESPACE = 12;
385
386    /**
387     * Unicode bidirectional constant ON.
388     *
389     * @since 1.4
390     */
391    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
392
393    /**
394     * Unicode bidirectional constant LRE.
395     *
396     * @since 1.4
397     */
398    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
399
400    /**
401     * Unicode bidirectional constant LRO.
402     *
403     * @since 1.4
404     */
405    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
406
407    /**
408     * Unicode bidirectional constant RLE.
409     *
410     * @since 1.4
411     */
412    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
413
414    /**
415     * Unicode bidirectional constant RLO.
416     *
417     * @since 1.4
418     */
419    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
420
421    /**
422     * Unicode bidirectional constant PDF.
423     *
424     * @since 1.4
425     */
426    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
427
428    /**
429     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
430     * encoding, {@code '\uD800'}.
431     *
432     * @since 1.5
433     */
434    public static final char MIN_HIGH_SURROGATE = '\uD800';
435
436    /**
437     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
438     * encoding, {@code '\uDBFF'}.
439     *
440     * @since 1.5
441     */
442    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
443
444    /**
445     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
446     * encoding, {@code '\uDC00'}.
447     *
448     * @since 1.5
449     */
450    public static final char MIN_LOW_SURROGATE = '\uDC00';
451
452    /**
453     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
454     * encoding, {@code '\uDFFF'}.
455     *
456     * @since 1.5
457     */
458    public static final char MAX_LOW_SURROGATE = '\uDFFF';
459
460    /**
461     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
462     *
463     * @since 1.5
464     */
465    public static final char MIN_SURROGATE = '\uD800';
466
467    /**
468     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
469     *
470     * @since 1.5
471     */
472    public static final char MAX_SURROGATE = '\uDFFF';
473
474    /**
475     * The minimum value of a supplementary code point, {@code U+010000}.
476     *
477     * @since 1.5
478     */
479    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
480
481    /**
482     * The minimum code point value, {@code U+0000}.
483     *
484     * @since 1.5
485     */
486    public static final int MIN_CODE_POINT = 0x000000;
487
488    /**
489     * The maximum code point value, {@code U+10FFFF}.
490     *
491     * @since 1.5
492     */
493    public static final int MAX_CODE_POINT = 0x10FFFF;
494
495    /**
496     * The number of bits required to represent a {@code Character} value
497     * unsigned form.
498     *
499     * @since 1.5
500     */
501    public static final int SIZE = 16;
502
503    private static final byte[] DIRECTIONALITY = new byte[] {
504            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
505            DIRECTIONALITY_EUROPEAN_NUMBER,
506            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
507            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
508            DIRECTIONALITY_ARABIC_NUMBER,
509            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
510            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
511            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
512            DIRECTIONALITY_OTHER_NEUTRALS,
513            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
514            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
515            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
516            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
517            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
518            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
519            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
520
521    /*
522     * Represents a subset of the Unicode character set.
523     */
524    public static class Subset {
525        private final String name;
526
527        /**
528         * Constructs a new {@code Subset}.
529         */
530        protected Subset(String name) {
531            if (name == null) {
532                throw new NullPointerException("name == null");
533            }
534            this.name = name;
535        }
536
537        /**
538         * Compares this character subset for identity with the specified object.
539         */
540        @Override public final boolean equals(Object object) {
541            return object == this;
542        }
543
544        /**
545         * Returns this subset's hash code, which is the hash code computed by
546         *         {@link java.lang.Object#hashCode()}.
547         */
548        @Override public final int hashCode() {
549            return super.hashCode();
550        }
551
552        /**
553         * Returns this subset's name.
554         */
555        @Override public final String toString() {
556            return name;
557        }
558    }
559
560    /**
561     * Represents a block of Unicode characters. This class provides constants for various
562     * well-known blocks (but not all blocks) and methods for looking up a block
563     * by name {@link #forName} or by code point {@link #of}.
564     *
565     * @since 1.2
566     */
567    public static final class UnicodeBlock extends Subset {
568        /**
569         * The Surrogates Area Unicode block.
570         *
571         * @deprecated As of Java 5, this block has been replaced by
572         *             {@link #HIGH_SURROGATES},
573         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
574         *             {@link #LOW_SURROGATES}.
575         */
576        @Deprecated
577        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA");
578
579        /** The Basic Latin Unicode block. */
580        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN");
581
582        /** The Latin-1 Supplement Unicode block. */
583        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT");
584
585        /** The Latin Extended-A Unicode block. */
586        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A");
587
588        /** The Latin Extended-B Unicode block. */
589        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B");
590
591        /** The IPA Extensions Unicode block. */
592        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS");
593
594        /** The Spacing Modifier Letters Unicode block. */
595        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS");
596
597        /** The Combining Diacritical Marks Unicode block. */
598        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS");
599
600        /**
601         * The Greek and Coptic Unicode block. Previously referred to as Greek.
602         */
603        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK");
604
605        /** The Cyrillic Unicode block. */
606        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC");
607
608        /**
609         * The Cyrillic Supplement Unicode block. Previously referred to as Cyrillic Supplementary.
610         */
611        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY");
612
613        /** The Armenian Unicode block. */
614        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN");
615
616        /** The Hebrew Unicode block. */
617        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW");
618
619        /** The Arabic Unicode block. */
620        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC");
621
622        /** The Syriac Unicode block. */
623        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC");
624
625        /** The Thaana Unicode block. */
626        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA");
627
628        /** The Devanagari Unicode block. */
629        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI");
630
631        /** The Bengali Unicode block. */
632        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI");
633
634        /** The Gurmukhi Unicode block. */
635        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI");
636
637        /** The Gujarati Unicode block. */
638        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI");
639
640        /** The Oriya Unicode block. */
641        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA");
642
643        /** The Tamil Unicode block. */
644        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL");
645
646        /** The Telugu Unicode block. */
647        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU");
648
649        /** The Kannada Unicode block. */
650        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA");
651
652        /** The Malayalam Unicode block. */
653        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM");
654
655        /** The Sinhala Unicode block. */
656        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA");
657
658        /** The Thai Unicode block. */
659        public static final UnicodeBlock THAI = new UnicodeBlock("THAI");
660
661        /** The Lao Unicode block. */
662        public static final UnicodeBlock LAO = new UnicodeBlock("LAO");
663
664        /** The Tibetan Unicode block. */
665        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN");
666
667        /** The Myanmar Unicode block. */
668        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR");
669
670        /** The Georgian Unicode block. */
671        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN");
672
673        /** The Hangul Jamo Unicode block. */
674        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO");
675
676        /** The Ethiopic Unicode block. */
677        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC");
678
679        /** The Cherokee Unicode block. */
680        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE");
681
682        /** The Unified Canadian Aboriginal Syllabics Unicode block. */
683        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
684
685        /** The Ogham Unicode block. */
686        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM");
687
688        /** The Runic Unicode block. */
689        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC");
690
691        /** The Tagalog Unicode block. */
692        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG");
693
694        /** The Hanunoo Unicode block. */
695        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO");
696
697        /** The Buhid Unicode block. */
698        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID");
699
700        /** The Tagbanwa Unicode block. */
701        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA");
702
703        /** The Khmer Unicode block. */
704        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER");
705
706        /** The Mongolian Unicode block. */
707        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN");
708
709        /** The Limbu Unicode block. */
710        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU");
711
712        /** The Tai Le Unicode block. */
713        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE");
714
715        /** The Khmer Symbols Unicode block. */
716        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS");
717
718        /** The Phonetic Extensions Unicode block. */
719        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS");
720
721        /** The Latin Extended Additional Unicode block. */
722        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL");
723
724        /** The Greek Extended Unicode block. */
725        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED");
726
727        /** The General Punctuation Unicode block. */
728        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION");
729
730        /** The Superscripts and Subscripts Unicode block. */
731        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS");
732
733        /** The Currency Symbols Unicode block. */
734        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS");
735
736        /**
737         * The Combining Diacritical Marks for Symbols Unicode
738         * Block. Previously referred to as Combining Marks for
739         * Symbols.
740         */
741        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS");
742
743        /** The Letterlike Symbols Unicode block. */
744        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS");
745
746        /** The Number Forms Unicode block. */
747        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS");
748
749        /** The Arrows Unicode block. */
750        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS");
751
752        /** The Mathematical Operators Unicode block. */
753        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS");
754
755        /** The Miscellaneous Technical Unicode block. */
756        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL");
757
758        /** The Control Pictures Unicode block. */
759        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES");
760
761        /** The Optical Character Recognition Unicode block. */
762        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION");
763
764        /** The Enclosed Alphanumerics Unicode block. */
765        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS");
766
767        /** The Box Drawing Unicode block. */
768        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING");
769
770        /** The Block Elements Unicode block. */
771        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS");
772
773        /** The Geometric Shapes Unicode block. */
774        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES");
775
776        /** The Miscellaneous Symbols Unicode block. */
777        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS");
778
779        /** The Dingbats Unicode block. */
780        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS");
781
782        /** The Miscellaneous Mathematical Symbols-A Unicode block. */
783        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A");
784
785        /** The Supplemental Arrows-A Unicode block. */
786        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A");
787
788        /** The Braille Patterns Unicode block. */
789        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS");
790
791        /** The Supplemental Arrows-B Unicode block. */
792        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B");
793
794        /** The Miscellaneous Mathematical Symbols-B Unicode block. */
795        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B");
796
797        /** The Supplemental Mathematical Operators Unicode block. */
798        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS");
799
800        /** The Miscellaneous Symbols and Arrows Unicode block. */
801        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS");
802
803        /** The CJK Radicals Supplement Unicode block. */
804        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT");
805
806        /** The Kangxi Radicals Unicode block. */
807        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS");
808
809        /** The Ideographic Description Characters Unicode block. */
810        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
811
812        /** The CJK Symbols and Punctuation Unicode block. */
813        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION");
814
815        /** The Hiragana Unicode block. */
816        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA");
817
818        /** The Katakana Unicode block. */
819        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA");
820
821        /** The Bopomofo Unicode block. */
822        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO");
823
824        /** The Hangul Compatibility Jamo Unicode block. */
825        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO");
826
827        /** The Kanbun Unicode block. */
828        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN");
829
830        /** The Bopomofo Extended Unicode block. */
831        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED");
832
833        /** The Katakana Phonetic Extensions Unicode block. */
834        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS");
835
836        /** The Enclosed CJK Letters and Months Unicode block. */
837        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS");
838
839        /** The CJK Compatibility Unicode block. */
840        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY");
841
842        /** The CJK Unified Ideographs Extension A Unicode block. */
843        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
844
845        /** The Yijing Hexagram Symbols Unicode block. */
846        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS");
847
848        /** The CJK Unified Ideographs Unicode block. */
849        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS");
850
851        /** The Yi Syllables Unicode block. */
852        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES");
853
854        /** The Yi Radicals Unicode block. */
855        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS");
856
857        /** The Hangul Syllables Unicode block. */
858        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES");
859
860        /**
861         * The High Surrogates Unicode block. This block represents
862         * code point values in the high surrogate range 0xD800 to 0xDB7F
863         */
864        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES");
865
866        /**
867         * The High Private Use Surrogates Unicode block. This block
868         * represents code point values in the high surrogate range 0xDB80 to
869         * 0xDBFF
870         */
871        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES");
872
873        /**
874         * The Low Surrogates Unicode block. This block represents
875         * code point values in the low surrogate range 0xDC00 to 0xDFFF
876         */
877        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES");
878
879        /** The Private Use Area Unicode block. */
880        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA");
881
882        /** The CJK Compatibility Ideographs Unicode block. */
883        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS");
884
885        /** The Alphabetic Presentation Forms Unicode block. */
886        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS");
887
888        /** The Arabic Presentation Forms-A Unicode block. */
889        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A");
890
891        /** The Variation Selectors Unicode block. */
892        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS");
893
894        /** The Combining Half Marks Unicode block. */
895        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS");
896
897        /** The CJK Compatibility Forms Unicode block. */
898        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS");
899
900        /** The Small Form Variants Unicode block. */
901        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS");
902
903        /** The Arabic Presentation Forms-B Unicode block. */
904        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B");
905
906        /** The Halfwidth and Fullwidth Forms Unicode block. */
907        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS");
908
909        /** The Specials Unicode block. */
910        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS");
911
912        /** The Linear B Syllabary Unicode block. */
913        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY");
914
915        /** The Linear B Ideograms Unicode block. */
916        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS");
917
918        /** The Aegean Numbers Unicode block. */
919        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS");
920
921        /** The Old Italic Unicode block. */
922        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC");
923
924        /** The Gothic Unicode block. */
925        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
926
927        /** The Ugaritic Unicode block. */
928        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
929
930        /** The Deseret Unicode block. */
931        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
932
933        /** The Shavian Unicode block. */
934        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
935
936        /** The Osmanya Unicode block. */
937        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
938
939        /** The Cypriot Syllabary Unicode block. */
940        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY");
941
942        /** The Byzantine Musical Symbols Unicode block. */
943        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS");
944
945        /** The Musical Symbols Unicode block. */
946        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS");
947
948        /** The Tai Xuan Jing Symbols Unicode block. */
949        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS");
950
951        /** The Mathematical Alphanumeric Symbols Unicode block. */
952        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS");
953
954        /** The CJK Unified Ideographs Extension B Unicode block. */
955        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B");
956
957        /** The CJK Compatibility Ideographs Supplement Unicode block. */
958        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT");
959
960        /** The Tags Unicode block. */
961        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
962
963        /** The Variation Selectors Supplement Unicode block. */
964        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT");
965
966        /** The Supplementary Private Use Area-A Unicode block. */
967        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A");
968
969        /** The Supplementary Private Use Area-B Unicode block. */
970        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B");
971
972        // Unicode 4.1.
973
974        /** The Ancient Greek Musical Notation Unicode 4.1 block. */
975        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION");
976
977        /** The Ancient Greek Numbers Unicode 4.1 block. */
978        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS");
979
980        /** The Arabic Supplement Unicode 4.1 block. */
981        public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT");
982
983        /** The Buginese Unicode 4.1 block. */
984        public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE");
985
986        /** The CJK Strokes Unicode 4.1 block. */
987        public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES");
988
989        /** The Combining Diacritical Marks Supplement Unicode 4.1 block. */
990        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT");
991
992        /** The Coptic Unicode 4.1 block. */
993        public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC");
994
995        /** The Ethiopic Extended Unicode 4.1 block. */
996        public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED");
997
998        /** The Ethiopic Supplement Unicode 4.1 block. */
999        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT");
1000
1001        /** The Georgian Supplement Unicode 4.1 block. */
1002        public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT");
1003
1004        /** The Glagolitic Unicode 4.1 block. */
1005        public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC");
1006
1007        /** The Kharoshthi Unicode 4.1 block. */
1008        public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI");
1009
1010        /** The Modifier Tone Letters Unicode 4.1 block. */
1011        public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS");
1012
1013        /** The New Tai Lue Unicode 4.1 block. */
1014        public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE");
1015
1016        /** The Old Persian Unicode 4.1 block. */
1017        public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN");
1018
1019        /** The Phonetic Extensions Supplement Unicode 4.1 block. */
1020        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT");
1021
1022        /** The Supplemental Punctuation Unicode 4.1 block. */
1023        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION");
1024
1025        /** The Syloti Nagri Unicode 4.1 block. */
1026        public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI");
1027
1028        /** The Tifinagh Unicode 4.1 block. */
1029        public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH");
1030
1031        /** The Vertical Forms Unicode 4.1 block. */
1032        public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS");
1033
1034        // Unicode 5.0.
1035
1036        /** The NKo Unicode 5.0 block. */
1037        public static final UnicodeBlock NKO = new UnicodeBlock("NKO");
1038
1039        /** The Balinese Unicode 5.0 block. */
1040        public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE");
1041
1042        /** The Latin Extended C Unicode 5.0 block. */
1043        public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C");
1044
1045        /** The Latin Extended D Unicode 5.0 block. */
1046        public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D");
1047
1048        /** The Phags-pa Unicode 5.0 block. */
1049        public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA");
1050
1051        /** The Phoenician Unicode 5.0 block. */
1052        public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN");
1053
1054        /** The Cuneiform Unicode 5.0 block. */
1055        public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM");
1056
1057        /** The Cuneiform Numbers And Punctuation Unicode 5.0 block. */
1058        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION");
1059
1060        /** The Counting Rod Numerals Unicode 5.0 block. */
1061        public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS");
1062
1063        // Unicode 5.1.
1064
1065        /** The Sudanese Unicode 5.1 block. */
1066        public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE");
1067
1068        /** The Lepcha Unicode 5.1 block. */
1069        public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA");
1070
1071        /** The Ol Chiki Unicode 5.1 block. */
1072        public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI");
1073
1074        /** The Cyrillic Extended-A Unicode 5.1 block. */
1075        public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A");
1076
1077        /** The Vai Unicode 5.1 block. */
1078        public static final UnicodeBlock VAI = new UnicodeBlock("VAI");
1079
1080        /** The Cyrillic Extended-B Unicode 5.1 block. */
1081        public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B");
1082
1083        /** The Saurashtra Unicode 5.1 block. */
1084        public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA");
1085
1086        /** The Kayah Li Unicode 5.1 block. */
1087        public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI");
1088
1089        /** The Rejang Unicode 5.1 block. */
1090        public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG");
1091
1092        /** The Cham Unicode 5.1 block. */
1093        public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM");
1094
1095        /** The Ancient Symbols Unicode 5.1 block. */
1096        public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS");
1097
1098        /** The Phaistos Disc Unicode 5.1 block. */
1099        public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC");
1100
1101        /** The Lycian Unicode 5.1 block. */
1102        public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN");
1103
1104        /** The Carian Unicode 5.1 block. */
1105        public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN");
1106
1107        /** The Lydian Unicode 5.1 block. */
1108        public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN");
1109
1110        /** The Mahjong Tiles Unicode 5.1 block. */
1111        public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES");
1112
1113        /** The Domino Tiles Unicode 5.1 block. */
1114        public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES");
1115
1116        // Unicode 5.2.
1117
1118        /** The Samaritan Unicode 5.2 block. */
1119        public static final UnicodeBlock SAMARITAN = new UnicodeBlock("SAMARITAN");
1120
1121        /** The Unified Canadian Aboriginal Syllabics Expanded Unicode 5.2 block. */
1122        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED");
1123
1124        /** The Tai Tham Unicode 5.2 block. */
1125        public static final UnicodeBlock TAI_THAM = new UnicodeBlock("TAI_THAM");
1126
1127        /** The Vedic Extensions Unicode 5.2 block. */
1128        public static final UnicodeBlock VEDIC_EXTENSIONS = new UnicodeBlock("VEDIC_EXTENSIONS");
1129
1130        /** The Lisu Extensions Unicode 5.2 block. */
1131        public static final UnicodeBlock LISU = new UnicodeBlock("LISU");
1132
1133        /** The Bamum Extensions Unicode 5.2 block. */
1134        public static final UnicodeBlock BAMUM = new UnicodeBlock("BAMUM");
1135
1136        /** The Common Indic Number Forms Unicode 5.2 block. */
1137        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS");
1138
1139        /** The Devanagari Extended Unicode 5.2 block. */
1140        public static final UnicodeBlock DEVANAGARI_EXTENDED = new UnicodeBlock("DEVANAGARI_EXTENDED");
1141
1142        /** The Hangul Jamo Extended-A Unicode 5.2 block. */
1143        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = new UnicodeBlock("HANGUL_JAMO_EXTENDED_A");
1144
1145        /** The Javanese Unicode 5.2 block. */
1146        public static final UnicodeBlock JAVANESE = new UnicodeBlock("JAVANESE");
1147
1148        /** The Myanmar Extended-A Unicode 5.2 block. */
1149        public static final UnicodeBlock MYANMAR_EXTENDED_A = new UnicodeBlock("MYANMAR_EXTENDED_A");
1150
1151        /** The Tai Viet Unicode 5.2 block. */
1152        public static final UnicodeBlock TAI_VIET = new UnicodeBlock("TAI_VIET");
1153
1154        /** The Meetei Mayek Unicode 5.2 block. */
1155        public static final UnicodeBlock MEETEI_MAYEK = new UnicodeBlock("MEETEI_MAYEK");
1156
1157        /** The Hangul Jamo Extended-B Unicode 5.2 block. */
1158        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = new UnicodeBlock("HANGUL_JAMO_EXTENDED_B");
1159
1160        /** The Imperial Aramaic Unicode 5.2 block. */
1161        public static final UnicodeBlock IMPERIAL_ARAMAIC = new UnicodeBlock("IMPERIAL_ARAMAIC");
1162
1163        /** The Old South Arabian Unicode 5.2 block. */
1164        public static final UnicodeBlock OLD_SOUTH_ARABIAN = new UnicodeBlock("OLD_SOUTH_ARABIAN");
1165
1166        /** The Avestan Unicode 5.2 block. */
1167        public static final UnicodeBlock AVESTAN = new UnicodeBlock("AVESTAN");
1168
1169        /** The Inscriptional Pathian Unicode 5.2 block. */
1170        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = new UnicodeBlock("INSCRIPTIONAL_PARTHIAN");
1171
1172        /** The Inscriptional Pahlavi Unicode 5.2 block. */
1173        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = new UnicodeBlock("INSCRIPTIONAL_PAHLAVI");
1174
1175        /** The Old Turkic Unicode 5.2 block. */
1176        public static final UnicodeBlock OLD_TURKIC = new UnicodeBlock("OLD_TURKIC");
1177
1178        /** The Rumi Numeral Symbols Unicode 5.2 block. */
1179        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = new UnicodeBlock("RUMI_NUMERAL_SYMBOLS");
1180
1181        /** The Kaithi Unicode 5.2 block. */
1182        public static final UnicodeBlock KAITHI = new UnicodeBlock("KAITHI");
1183
1184        /** The Egyptian Hieroglyphs Unicode 5.2 block. */
1185        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = new UnicodeBlock("EGYPTIAN_HIEROGLYPHS");
1186
1187        /** The Enclosed Alphanumeric Supplement Unicode 5.2 block. */
1188        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT");
1189
1190        /** The Enclosed Ideographic Supplement Unicode 5.2 block. */
1191        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT");
1192
1193        /** The CJK Unified Ideographs Unicode 5.2 block. */
1194        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C");
1195
1196        // Unicode 6.0.
1197
1198        /** The Mandaic Unicode 6.0 block. */
1199        public static final UnicodeBlock MANDAIC = new UnicodeBlock("MANDAIC");
1200
1201        /** The Batak Unicode 6.0 block. */
1202        public static final UnicodeBlock BATAK = new UnicodeBlock("BATAK");
1203
1204        /** The Ethiopic Extended-A Unicode 6.0 block. */
1205        public static final UnicodeBlock ETHIOPIC_EXTENDED_A = new UnicodeBlock("ETHIOPIC_EXTENDED_A");
1206
1207        /** The Brahmi Unicode 6.0 block. */
1208        public static final UnicodeBlock BRAHMI = new UnicodeBlock("BRAHMI");
1209
1210        /** The Bamum Supplement Unicode 6.0 block. */
1211        public static final UnicodeBlock BAMUM_SUPPLEMENT = new UnicodeBlock("BAMUM_SUPPLEMENT");
1212
1213        /** The Kana Supplement Unicode 6.0 block. */
1214        public static final UnicodeBlock KANA_SUPPLEMENT = new UnicodeBlock("KANA_SUPPLEMENT");
1215
1216        /** The Playing Cards Supplement Unicode 6.0 block. */
1217        public static final UnicodeBlock PLAYING_CARDS = new UnicodeBlock("PLAYING_CARDS");
1218
1219        /** The Miscellaneous Symbols And Pictographs Supplement Unicode 6.0 block. */
1220        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS");
1221
1222        /** The Emoticons Unicode 6.0 block. */
1223        public static final UnicodeBlock EMOTICONS = new UnicodeBlock("EMOTICONS");
1224
1225        /** The Transport And Map Symbols Unicode 6.0 block. */
1226        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS");
1227
1228        /** The Alchemical Symbols Unicode 6.0 block. */
1229        public static final UnicodeBlock ALCHEMICAL_SYMBOLS = new UnicodeBlock("ALCHEMICAL_SYMBOLS");
1230
1231        /** The CJK Unified Ideographs Extension-D Unicode 6.0 block. */
1232        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D");
1233
1234        /*
1235         * All of the UnicodeBlocks above, in the icu4c UBlock enum order.
1236         */
1237        private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
1238            null, // icu4c numbers blocks starting at 1, so index 0 should be null.
1239
1240            UnicodeBlock.BASIC_LATIN,
1241            UnicodeBlock.LATIN_1_SUPPLEMENT,
1242            UnicodeBlock.LATIN_EXTENDED_A,
1243            UnicodeBlock.LATIN_EXTENDED_B,
1244            UnicodeBlock.IPA_EXTENSIONS,
1245            UnicodeBlock.SPACING_MODIFIER_LETTERS,
1246            UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
1247            UnicodeBlock.GREEK,
1248            UnicodeBlock.CYRILLIC,
1249            UnicodeBlock.ARMENIAN,
1250            UnicodeBlock.HEBREW,
1251            UnicodeBlock.ARABIC,
1252            UnicodeBlock.SYRIAC,
1253            UnicodeBlock.THAANA,
1254            UnicodeBlock.DEVANAGARI,
1255            UnicodeBlock.BENGALI,
1256            UnicodeBlock.GURMUKHI,
1257            UnicodeBlock.GUJARATI,
1258            UnicodeBlock.ORIYA,
1259            UnicodeBlock.TAMIL,
1260            UnicodeBlock.TELUGU,
1261            UnicodeBlock.KANNADA,
1262            UnicodeBlock.MALAYALAM,
1263            UnicodeBlock.SINHALA,
1264            UnicodeBlock.THAI,
1265            UnicodeBlock.LAO,
1266            UnicodeBlock.TIBETAN,
1267            UnicodeBlock.MYANMAR,
1268            UnicodeBlock.GEORGIAN,
1269            UnicodeBlock.HANGUL_JAMO,
1270            UnicodeBlock.ETHIOPIC,
1271            UnicodeBlock.CHEROKEE,
1272            UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1273            UnicodeBlock.OGHAM,
1274            UnicodeBlock.RUNIC,
1275            UnicodeBlock.KHMER,
1276            UnicodeBlock.MONGOLIAN,
1277            UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
1278            UnicodeBlock.GREEK_EXTENDED,
1279            UnicodeBlock.GENERAL_PUNCTUATION,
1280            UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
1281            UnicodeBlock.CURRENCY_SYMBOLS,
1282            UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
1283            UnicodeBlock.LETTERLIKE_SYMBOLS,
1284            UnicodeBlock.NUMBER_FORMS,
1285            UnicodeBlock.ARROWS,
1286            UnicodeBlock.MATHEMATICAL_OPERATORS,
1287            UnicodeBlock.MISCELLANEOUS_TECHNICAL,
1288            UnicodeBlock.CONTROL_PICTURES,
1289            UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
1290            UnicodeBlock.ENCLOSED_ALPHANUMERICS,
1291            UnicodeBlock.BOX_DRAWING,
1292            UnicodeBlock.BLOCK_ELEMENTS,
1293            UnicodeBlock.GEOMETRIC_SHAPES,
1294            UnicodeBlock.MISCELLANEOUS_SYMBOLS,
1295            UnicodeBlock.DINGBATS,
1296            UnicodeBlock.BRAILLE_PATTERNS,
1297            UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
1298            UnicodeBlock.KANGXI_RADICALS,
1299            UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1300            UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
1301            UnicodeBlock.HIRAGANA,
1302            UnicodeBlock.KATAKANA,
1303            UnicodeBlock.BOPOMOFO,
1304            UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
1305            UnicodeBlock.KANBUN,
1306            UnicodeBlock.BOPOMOFO_EXTENDED,
1307            UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
1308            UnicodeBlock.CJK_COMPATIBILITY,
1309            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1310            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
1311            UnicodeBlock.YI_SYLLABLES,
1312            UnicodeBlock.YI_RADICALS,
1313            UnicodeBlock.HANGUL_SYLLABLES,
1314            UnicodeBlock.HIGH_SURROGATES,
1315            UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
1316            UnicodeBlock.LOW_SURROGATES,
1317            UnicodeBlock.PRIVATE_USE_AREA,
1318            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
1319            UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
1320            UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
1321            UnicodeBlock.COMBINING_HALF_MARKS,
1322            UnicodeBlock.CJK_COMPATIBILITY_FORMS,
1323            UnicodeBlock.SMALL_FORM_VARIANTS,
1324            UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
1325            UnicodeBlock.SPECIALS,
1326            UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
1327
1328            // Unicode 3.1.
1329            UnicodeBlock.OLD_ITALIC,
1330            UnicodeBlock.GOTHIC,
1331            UnicodeBlock.DESERET,
1332            UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
1333            UnicodeBlock.MUSICAL_SYMBOLS,
1334            UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1335            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1336            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1337            UnicodeBlock.TAGS,
1338
1339            // Unicode 3.2.
1340            UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
1341            UnicodeBlock.TAGALOG,
1342            UnicodeBlock.HANUNOO,
1343            UnicodeBlock.BUHID,
1344            UnicodeBlock.TAGBANWA,
1345            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1346            UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
1347            UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
1348            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1349            UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1350            UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
1351            UnicodeBlock.VARIATION_SELECTORS,
1352            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1353            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1354
1355            // Unicode 4.0.
1356            UnicodeBlock.LIMBU,
1357            UnicodeBlock.TAI_LE,
1358            UnicodeBlock.KHMER_SYMBOLS,
1359            UnicodeBlock.PHONETIC_EXTENSIONS,
1360            UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1361            UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
1362            UnicodeBlock.LINEAR_B_SYLLABARY,
1363            UnicodeBlock.LINEAR_B_IDEOGRAMS,
1364            UnicodeBlock.AEGEAN_NUMBERS,
1365            UnicodeBlock.UGARITIC,
1366            UnicodeBlock.SHAVIAN,
1367            UnicodeBlock.OSMANYA,
1368            UnicodeBlock.CYPRIOT_SYLLABARY,
1369            UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
1370            UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT,
1371
1372            // Unicode 4.1.
1373            UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION,
1374            UnicodeBlock.ANCIENT_GREEK_NUMBERS,
1375            UnicodeBlock.ARABIC_SUPPLEMENT,
1376            UnicodeBlock.BUGINESE,
1377            UnicodeBlock.CJK_STROKES,
1378            UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
1379            UnicodeBlock.COPTIC,
1380            UnicodeBlock.ETHIOPIC_EXTENDED,
1381            UnicodeBlock.ETHIOPIC_SUPPLEMENT,
1382            UnicodeBlock.GEORGIAN_SUPPLEMENT,
1383            UnicodeBlock.GLAGOLITIC,
1384            UnicodeBlock.KHAROSHTHI,
1385            UnicodeBlock.MODIFIER_TONE_LETTERS,
1386            UnicodeBlock.NEW_TAI_LUE,
1387            UnicodeBlock.OLD_PERSIAN,
1388            UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT,
1389            UnicodeBlock.SUPPLEMENTAL_PUNCTUATION,
1390            UnicodeBlock.SYLOTI_NAGRI,
1391            UnicodeBlock.TIFINAGH,
1392            UnicodeBlock.VERTICAL_FORMS,
1393
1394            // Unicode 5.0.
1395            UnicodeBlock.NKO,
1396            UnicodeBlock.BALINESE,
1397            UnicodeBlock.LATIN_EXTENDED_C,
1398            UnicodeBlock.LATIN_EXTENDED_D,
1399            UnicodeBlock.PHAGS_PA,
1400            UnicodeBlock.PHOENICIAN,
1401            UnicodeBlock.CUNEIFORM,
1402            UnicodeBlock.CUNEIFORM_NUMBERS_AND_PUNCTUATION,
1403            UnicodeBlock.COUNTING_ROD_NUMERALS,
1404
1405            // Unicode 5.1.
1406            UnicodeBlock.SUNDANESE,
1407            UnicodeBlock.LEPCHA,
1408            UnicodeBlock.OL_CHIKI,
1409            UnicodeBlock.CYRILLIC_EXTENDED_A,
1410            UnicodeBlock.VAI,
1411            UnicodeBlock.CYRILLIC_EXTENDED_B,
1412            UnicodeBlock.SAURASHTRA,
1413            UnicodeBlock.KAYAH_LI,
1414            UnicodeBlock.REJANG,
1415            UnicodeBlock.CHAM,
1416            UnicodeBlock.ANCIENT_SYMBOLS,
1417            UnicodeBlock.PHAISTOS_DISC,
1418            UnicodeBlock.LYCIAN,
1419            UnicodeBlock.CARIAN,
1420            UnicodeBlock.LYDIAN,
1421            UnicodeBlock.MAHJONG_TILES,
1422            UnicodeBlock.DOMINO_TILES,
1423
1424            // Unicode 5.2.
1425            UnicodeBlock.SAMARITAN,
1426            UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
1427            UnicodeBlock.TAI_THAM,
1428            UnicodeBlock.VEDIC_EXTENSIONS,
1429            UnicodeBlock.LISU,
1430            UnicodeBlock.BAMUM,
1431            UnicodeBlock.COMMON_INDIC_NUMBER_FORMS,
1432            UnicodeBlock.DEVANAGARI_EXTENDED,
1433            UnicodeBlock.HANGUL_JAMO_EXTENDED_A,
1434            UnicodeBlock.JAVANESE,
1435            UnicodeBlock.MYANMAR_EXTENDED_A,
1436            UnicodeBlock.TAI_VIET,
1437            UnicodeBlock.MEETEI_MAYEK,
1438            UnicodeBlock.HANGUL_JAMO_EXTENDED_B,
1439            UnicodeBlock.IMPERIAL_ARAMAIC,
1440            UnicodeBlock.OLD_SOUTH_ARABIAN,
1441            UnicodeBlock.AVESTAN,
1442            UnicodeBlock.INSCRIPTIONAL_PARTHIAN,
1443            UnicodeBlock.INSCRIPTIONAL_PAHLAVI,
1444            UnicodeBlock.OLD_TURKIC,
1445            UnicodeBlock.RUMI_NUMERAL_SYMBOLS,
1446            UnicodeBlock.KAITHI,
1447            UnicodeBlock.EGYPTIAN_HIEROGLYPHS,
1448            UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
1449            UnicodeBlock.ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
1450            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
1451
1452            // Unicode 6.0.
1453            UnicodeBlock.MANDAIC,
1454            UnicodeBlock.BATAK,
1455            UnicodeBlock.ETHIOPIC_EXTENDED_A,
1456            UnicodeBlock.BRAHMI,
1457            UnicodeBlock.BAMUM_SUPPLEMENT,
1458            UnicodeBlock.KANA_SUPPLEMENT,
1459            UnicodeBlock.PLAYING_CARDS,
1460            UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
1461            UnicodeBlock.EMOTICONS,
1462            UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS,
1463            UnicodeBlock.ALCHEMICAL_SYMBOLS,
1464            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
1465        };
1466
1467        /**
1468         * Returns the Unicode block for the given block name, or null if there is no
1469         * such block.
1470         *
1471         * <p>Block names may be one of the following:
1472         * <ul>
1473         * <li>Canonical block name, as defined by the Unicode specification;
1474         * case-insensitive.</li>
1475         * <li>Canonical block name without any spaces, as defined by the
1476         * Unicode specification; case-insensitive.</li>
1477         * <li>A {@code UnicodeBlock} constant identifier. This is determined by
1478         * converting the canonical name to uppercase and replacing all spaces and hyphens
1479         * with underscores.</li>
1480         * </ul>
1481         *
1482         * @throws NullPointerException
1483         *             if {@code blockName == null}.
1484         * @throws IllegalArgumentException
1485         *             if {@code blockName} is not the name of any known block.
1486         * @since 1.5
1487         */
1488        public static UnicodeBlock forName(String blockName) {
1489            if (blockName == null) {
1490                throw new NullPointerException("blockName == null");
1491            }
1492            int block = unicodeBlockForName(blockName);
1493            if (block == -1) {
1494                throw new IllegalArgumentException("Unknown block: " + blockName);
1495            }
1496            return BLOCKS[block];
1497        }
1498
1499        /**
1500         * Returns the Unicode block containing the given code point, or null if the
1501         * code point does not belong to any known block.
1502         */
1503        public static UnicodeBlock of(char c) {
1504            return of((int) c);
1505        }
1506
1507        /**
1508         * Returns the Unicode block containing the given code point, or null if the
1509         * code point does not belong to any known block.
1510         */
1511        public static UnicodeBlock of(int codePoint) {
1512            checkValidCodePoint(codePoint);
1513            int block = unicodeBlockForCodePoint(codePoint);
1514            if (block == -1 || block >= BLOCKS.length) {
1515                return null;
1516            }
1517            return BLOCKS[block];
1518        }
1519
1520        private UnicodeBlock(String blockName) {
1521            super(blockName);
1522        }
1523    }
1524
1525    private static native int unicodeBlockForName(String blockName);
1526
1527    private static native int unicodeBlockForCodePoint(int codePoint);
1528
1529    /**
1530     * Represents a <a href="http://www.unicode.org/reports/tr24/">Unicode script</a>.
1531     * Every Unicode code point is contained by a single {@code UnicodeScript}. Code points
1532     * shared between scripts will be in {@code COMMON}. Code points for combining
1533     * characters that can be applied to multiple scripts will be in {@code INHERITED}
1534     * because they inherit the script of their base character. Code points whose scripts
1535     * don't have a corresponding {@code UnicodeScript} will be in {@code UNKNOWN}.
1536     *
1537     * @since 1.7
1538     * @hide
1539     */
1540    public static enum UnicodeScript {
1541        /** ISO 15924 English name "Arabic" */
1542        ARABIC,
1543        /** ISO 15924 English name "Armenian" */
1544        ARMENIAN,
1545        /** ISO 15924 English name "Avestan" */
1546        AVESTAN,
1547        /** ISO 15924 English name "Balinese" */
1548        BALINESE,
1549        /** ISO 15924 English name "Bamum" */
1550        BAMUM,
1551        /** ISO 15924 English name "Batak" */
1552        BATAK,
1553        /** ISO 15924 English name "Bengali" */
1554        BENGALI,
1555        /** ISO 15924 English name "Bopomofo" */
1556        BOPOMOFO,
1557        /** ISO 15924 English name "Brahmi" */
1558        BRAHMI,
1559        /** ISO 15924 English name "Braille" */
1560        BRAILLE,
1561        /** ISO 15924 English name "Buginese" */
1562        BUGINESE,
1563        /** ISO 15924 English name "Buhid" */
1564        BUHID,
1565        /** ISO 15924 English name "Unified Canadian Aboriginal Syllabics" */
1566        CANADIAN_ABORIGINAL,
1567        /** ISO 15924 English name "Carian" */
1568        CARIAN,
1569        /** ISO 15924 English name "Cham" */
1570        CHAM,
1571        /** ISO 15924 English name "Cherokee" */
1572        CHEROKEE,
1573        /** ISO 15924 English name "Common" */
1574        COMMON,
1575        /** ISO 15924 English name "Coptic" */
1576        COPTIC,
1577        /** ISO 15924 English name "Cuneiform" */
1578        CUNEIFORM,
1579        /** ISO 15924 English name "Cypriot" */
1580        CYPRIOT,
1581        /** ISO 15924 English name "Cyrillic" */
1582        CYRILLIC,
1583        /** ISO 15924 English name "Deseret" */
1584        DESERET,
1585        /** ISO 15924 English name "Devanagari" */
1586        DEVANAGARI,
1587        /** ISO 15924 English name "Egyptian hieroglyphs" */
1588        EGYPTIAN_HIEROGLYPHS,
1589        /** ISO 15924 English name "Ethiopic" */
1590        ETHIOPIC,
1591        /** ISO 15924 English name "Georgian" */
1592        GEORGIAN,
1593        /** ISO 15924 English name "Glagolitic" */
1594        GLAGOLITIC,
1595        /** ISO 15924 English name "Gothic" */
1596        GOTHIC,
1597        /** ISO 15924 English name "Greek" */
1598        GREEK,
1599        /** ISO 15924 English name "Gujarati" */
1600        GUJARATI,
1601        /** ISO 15924 English name "Gurmukhi" */
1602        GURMUKHI,
1603        /** ISO 15924 English name "Han" */
1604        HAN,
1605        /** ISO 15924 English name "Hangul" */
1606        HANGUL,
1607        /** ISO 15924 English name "Hanunoo" */
1608        HANUNOO,
1609        /** ISO 15924 English name "Hebrew" */
1610        HEBREW,
1611        /** ISO 15924 English name "Hiragana" */
1612        HIRAGANA,
1613        /** ISO 15924 English name "Imperial aramaic" */
1614        IMPERIAL_ARAMAIC,
1615        /** ISO 15924 English name "Inherited" */
1616        INHERITED,
1617        /** ISO 15924 English name "Inscriptional pahlavi" */
1618        INSCRIPTIONAL_PAHLAVI,
1619        /** ISO 15924 English name "Inscriptional parthian" */
1620        INSCRIPTIONAL_PARTHIAN,
1621        /** ISO 15924 English name "Javanese" */
1622        JAVANESE,
1623        /** ISO 15924 English name "Kaithi" */
1624        KAITHI,
1625        /** ISO 15924 English name "Kannada" */
1626        KANNADA,
1627        /** ISO 15924 English name "Katakana" */
1628        KATAKANA,
1629        /** ISO 15924 English name "Kayah li" */
1630        KAYAH_LI,
1631        /** ISO 15924 English name "Kharoshthi" */
1632        KHAROSHTHI,
1633        /** ISO 15924 English name "Khmer" */
1634        KHMER,
1635        /** ISO 15924 English name "Lao" */
1636        LAO,
1637        /** ISO 15924 English name "Latin" */
1638        LATIN,
1639        /** ISO 15924 English name "Lepcha" */
1640        LEPCHA,
1641        /** ISO 15924 English name "Limbu" */
1642        LIMBU,
1643        /** ISO 15924 English name "Linear B" */
1644        LINEAR_B,
1645        /** ISO 15924 English name "Lisu" */
1646        LISU,
1647        /** ISO 15924 English name "Lycian" */
1648        LYCIAN,
1649        /** ISO 15924 English name "Lydian" */
1650        LYDIAN,
1651        /** ISO 15924 English name "Malayalam" */
1652        MALAYALAM,
1653        /** ISO 15924 English name "Mandaic" */
1654        MANDAIC,
1655        /** ISO 15924 English name "Meetei Mayek (Meithei, Meetei)" */
1656        MEETEI_MAYEK,
1657        /** ISO 15924 English name "Mongolian" */
1658        MONGOLIAN,
1659        /** ISO 15924 English name "Myanmar" */
1660        MYANMAR,
1661        /** ISO 15924 English name "New Tai Lue" */
1662        NEW_TAI_LUE,
1663        /** ISO 15924 English name "Nko" */
1664        NKO,
1665        /** ISO 15924 English name "Ogham" */
1666        OGHAM,
1667        /** ISO 15924 English name "Ol Chiki" */
1668        OL_CHIKI,
1669        /** ISO 15924 English name "Old Italic" */
1670        OLD_ITALIC,
1671        /** ISO 15924 English name "Old Persian" */
1672        OLD_PERSIAN,
1673        /** ISO 15924 English name "Old South Arabian" */
1674        OLD_SOUTH_ARABIAN,
1675        /** ISO 15924 English name "Old Turkic, Orkhon Runic" */
1676        OLD_TURKIC,
1677        /** ISO 15924 English name "Oriya" */
1678        ORIYA,
1679        /** ISO 15924 English name "Osmanya" */
1680        OSMANYA,
1681        /** ISO 15924 English name "Phags-pa" */
1682        PHAGS_PA,
1683        /** ISO 15924 English name "Phoenician" */
1684        PHOENICIAN,
1685        /** ISO 15924 English name "Rejang" */
1686        REJANG,
1687        /** ISO 15924 English name "Runic" */
1688        RUNIC,
1689        /** ISO 15924 English name "Samaritan" */
1690        SAMARITAN,
1691        /** ISO 15924 English name "Saurashtra" */
1692        SAURASHTRA,
1693        /** ISO 15924 English name "Shavian" */
1694        SHAVIAN,
1695        /** ISO 15924 English name "Sinhala" */
1696        SINHALA,
1697        /** ISO 15924 English name "Sundanese" */
1698        SUNDANESE,
1699        /** ISO 15924 English name "Syloti Nagri" */
1700        SYLOTI_NAGRI,
1701        /** ISO 15924 English name "Syriac" */
1702        SYRIAC,
1703        /** ISO 15924 English name "Tagalog" */
1704        TAGALOG,
1705        /** ISO 15924 English name "Tagbanwa" */
1706        TAGBANWA,
1707        /** ISO 15924 English name "Tai Le" */
1708        TAI_LE,
1709        /** ISO 15924 English name "Tai Tham (Lanna)" */
1710        TAI_THAM,
1711        /** ISO 15924 English name "Tai Viet" */
1712        TAI_VIET,
1713        /** ISO 15924 English name "Tamil" */
1714        TAMIL,
1715        /** ISO 15924 English name "Telugu" */
1716        TELUGU,
1717        /** ISO 15924 English name "Thaana" */
1718        THAANA,
1719        /** ISO 15924 English name "Thai" */
1720        THAI,
1721        /** ISO 15924 English name "Tibetan" */
1722        TIBETAN,
1723        /** ISO 15924 English name "Tifinagh" */
1724        TIFINAGH,
1725        /** ISO 15924 English name "Ugaritic" */
1726        UGARITIC,
1727        /** ISO 15924 English name "Unknown" */
1728        UNKNOWN,
1729        /** ISO 15924 English name "Vai" */
1730        VAI,
1731        /** ISO 15924 English name "Yi" */
1732        YI;
1733
1734        private static final UnicodeScript[] SCRIPTS = {
1735                COMMON,
1736                INHERITED,
1737                ARABIC,
1738                ARMENIAN,
1739                BENGALI,
1740                BOPOMOFO,
1741                CHEROKEE,
1742                COPTIC,
1743                CYRILLIC,
1744                DESERET,
1745                DEVANAGARI,
1746                ETHIOPIC,
1747                GEORGIAN,
1748                GOTHIC,
1749                GREEK,
1750                GUJARATI,
1751                GURMUKHI,
1752                HAN,
1753                HANGUL,
1754                HEBREW,
1755                HIRAGANA,
1756                KANNADA,
1757                KATAKANA,
1758                KHMER,
1759                LAO,
1760                LATIN,
1761                MALAYALAM,
1762                MONGOLIAN,
1763                MYANMAR,
1764                OGHAM,
1765                OLD_ITALIC,
1766                ORIYA,
1767                RUNIC,
1768                SINHALA,
1769                SYRIAC,
1770                TAMIL,
1771                TELUGU,
1772                THAANA,
1773                THAI,
1774                TIBETAN,
1775                CANADIAN_ABORIGINAL,
1776                YI,
1777                TAGALOG,
1778                HANUNOO,
1779                BUHID,
1780                TAGBANWA,
1781                BRAILLE,
1782                CYPRIOT,
1783                LIMBU,
1784                LINEAR_B,
1785                OSMANYA,
1786                SHAVIAN,
1787                TAI_LE,
1788                UGARITIC,
1789                null,  // USCRIPT_KATAKANA_OR_HIRAGANA
1790                BUGINESE,
1791                GLAGOLITIC,
1792                KHAROSHTHI,
1793                SYLOTI_NAGRI,
1794                NEW_TAI_LUE,
1795                TIFINAGH,
1796                OLD_PERSIAN,
1797                BALINESE,
1798                BATAK,
1799                null, // USCRIPT_BLISSYMBOLS,
1800                BRAHMI,
1801                CHAM,
1802                null,  // USCRIPT_CIRTH,
1803                null,  // USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC,
1804                null,  // USCRIPT_DEMOTIC_EGYPTIAN,
1805                null,  // USCRIPT_HIERATIC_EGYPTIAN,
1806                EGYPTIAN_HIEROGLYPHS,
1807                null,  // USCRIPT_USCRIPT_KHUTSURI,
1808                null,  // USCRIPT_SIMPLIFIED_HAN,
1809                null,  // USCRIPT_TRADITIONAL_HAN,
1810                null,  // USCRIPT_PAHAWH_HMONG,
1811                null,  // USCRIPT_OLD_HUNGARIAN,
1812                null,  // USCRIPT_HARAPPAN_INDUS,
1813                JAVANESE,
1814                KAYAH_LI,
1815                null,  // USCRIPT_LATIN_FRAKTUR,
1816                null,  // USCRIPT_LATIN_GAELIC,
1817                LEPCHA,
1818                null,  // USCRIPT_LINEAR_A,
1819                MANDAIC, // == MANDAEAN
1820                null,  // USCRIPT_MAYAN_HIEROGLYPHS,
1821                null,  // USCRIPT_MEROITIC_HIEROGLYPHS == USCRIPT_MEROITIC
1822                null,  // USCRIPT_NKO,
1823                OLD_TURKIC,  // USCRIPT_ORKHON == OLD_TURKIC,
1824                null,  // USCRIPT_OLD_PERMIC,
1825                PHAGS_PA,
1826                PHOENICIAN,
1827                null,  // USCRIPT_PHONETIC_POLLARD === MIAO,
1828                null,  // USCRIPT_RONGORONGO,
1829                null,  // USCRIPT_SARATI,
1830                null,  // USCRIPT_ESTRANGELO_SYRIAC,
1831                null,  // USCRIPT_WESTERN_SYRIAC,
1832                null,  // USCRIPT_EASTERN_SYRIAC,
1833                null,  // USCRIPT_TENGWAR,
1834                VAI,
1835                null,  // USCRIPT_VISIBLE_SPEECH,
1836                CUNEIFORM,
1837                null,  // USCRIPT_UNWRITTEN_LANGUAGES,
1838                UNKNOWN,
1839                CARIAN,
1840                null,  // USCRIPT_JAPANESE,
1841                TAI_THAM,  // USCRIPT_LANNA (aka TAI_THAM),
1842                LYCIAN,
1843                LYDIAN,
1844                OL_CHIKI,
1845                REJANG,
1846                SAURASHTRA,
1847                null,  // USCRIPT_SIGN_WRITING,
1848                SUNDANESE,
1849                null,  // USCRIPT_MOON,
1850                MEETEI_MAYEK,  // USCRIPT_MEITEI_MAYEK (aka MEETEI, MEITHEI),
1851                IMPERIAL_ARAMAIC,
1852                AVESTAN,
1853                null,  // USCRIPT_CHAKMA,
1854                null,  // USCRIPT_KOREAN,
1855                KAITHI,
1856                null,  // USCRIPT_MANICHAEAN,
1857                INSCRIPTIONAL_PAHLAVI,
1858                null,  // USCRIPT_PSALTER_PAHLAVI,
1859                null,  // USCRIPT_BOOK_PAHLAVI,
1860                INSCRIPTIONAL_PARTHIAN,
1861                SAMARITAN,
1862                TAI_VIET,
1863                null,  // USCRIPT_MATHEMATICAL_NOTATION,
1864                null,  // USCRIPT_SYMBOLS,
1865                BAMUM,
1866                LISU,
1867                null,  // USCRIPT_NAKHI_GEBA,
1868                OLD_SOUTH_ARABIAN,
1869                null,  // USCRIPT_BASSA_VAH,
1870                null,  // USCRIPT_DUPLOYAN_SHORTAND,
1871                null,  // USCRIPT_ELBASAN,
1872                null,  // USCRIPT_GRANTHA,
1873                null,  // USCRIPT_KPELLE,
1874                null,  // USCRIPT_LOMA,
1875                null,  // USCRIPT_MENDE,
1876                null,  // USCRIPT_MEROITIC_CURSIVE,
1877                null,  // USCRIPT_OLD_NORTH_ARABIAN,
1878                null,  // USCRIPT_NABATAEAN,
1879                null,  // USCRIPT_PALMYRENE,
1880                null,  // USCRIPT_SINDHI,
1881                null,  // USCRIPT_WARANG_CITI,
1882                null,  // USCRIPT_AFAKA,
1883                null,  // USCRIPT_JURCHEN,
1884                null,  // USCRIPT_MRO,
1885                null,  // USCRIPT_NUSHU,
1886                null,  // USCRIPT_SHARADA,
1887                null,  // USCRIPT_SORA_SOMPENG,
1888                null,  // USCRIPT_TAKRI,
1889                null,  // USCRIPT_TANGUT,
1890                null,  // USCRIPT_WOLEAI,
1891                null,  // USCRIPT_ANATOLIAN_HIEROGLYPHS,
1892                null,  // USCRIPT_KHOJKI,
1893                null,  // USCRIPT_TIRHUTA,
1894        };
1895
1896        /**
1897         * Returns the {@link UnicodeScript} value identified by {@code scriptName}.
1898         * {@code scriptName} can be a ISO-15924 English script name
1899         * or an alias (ISO-15924 script code) for that name.
1900         * {@see http://www.unicode.org/iso15924/iso15924-codes.html}
1901         * Lookups are case insensitive.
1902         *
1903         * @throws NullPointerException if {@code scriptName} is null.
1904         * @throws IllegalAccessException if {@code scriptName} in invalid.
1905         *
1906         * @since 1.7
1907         */
1908        public static UnicodeScript forName(String scriptName) {
1909            if (scriptName == null) {
1910                throw new NullPointerException("scriptName == null");
1911            }
1912
1913            final int script = unicodeScriptForName(scriptName);
1914            if (script == -1 || script >= SCRIPTS.length ||
1915                    SCRIPTS[script] == null) {
1916                throw new IllegalArgumentException("Unknown script: " + scriptName);
1917            }
1918
1919            return SCRIPTS[script];
1920        }
1921
1922        /**
1923         * Returns the {@link UnicodeScript} value that the given Unicode code
1924         * point is assigned to.
1925         *
1926         * @throws IllegalArgumentException if {@codePoint} is not a valid Unicode code point.
1927         */
1928        public static UnicodeScript of(int codePoint) {
1929            checkValidCodePoint(codePoint);
1930            int script = unicodeScriptForCodePoint(codePoint);
1931            if (script == -1 || script >= SCRIPTS.length) {
1932                // This signifies an ICU error. Complain loudly instead of swallowing
1933                // the error up.
1934                throw new IllegalArgumentException("Invalid codePoint: " + codePoint);
1935            }
1936
1937            // This happens when ICU maps the code point to a script known to ICU but
1938            // not the Java API.
1939            if (SCRIPTS[script] == null) {
1940                return UNKNOWN;
1941            }
1942
1943            return SCRIPTS[script];
1944        }
1945    }
1946
1947    private static native int unicodeScriptForName(String blockName);
1948
1949    private static native int unicodeScriptForCodePoint(int codePoint);
1950
1951
1952    /**
1953     * Constructs a new {@code Character} with the specified primitive char
1954     * value.
1955     *
1956     * @param value
1957     *            the primitive char value to store in the new instance.
1958     */
1959    public Character(char value) {
1960        this.value = value;
1961    }
1962
1963    /**
1964     * Gets the primitive value of this character.
1965     *
1966     * @return this object's primitive value.
1967     */
1968    public char charValue() {
1969        return value;
1970    }
1971
1972    private static void checkValidCodePoint(int codePoint) {
1973        if (!isValidCodePoint(codePoint)) {
1974            throw new IllegalArgumentException("Invalid code point: " + codePoint);
1975        }
1976    }
1977
1978    /**
1979     * Compares this object to the specified character object to determine their
1980     * relative order.
1981     *
1982     * @param c
1983     *            the character object to compare this object to.
1984     * @return {@code 0} if the value of this character and the value of
1985     *         {@code c} are equal; a positive value if the value of this
1986     *         character is greater than the value of {@code c}; a negative
1987     *         value if the value of this character is less than the value of
1988     *         {@code c}.
1989     * @see java.lang.Comparable
1990     * @since 1.2
1991     */
1992    public int compareTo(Character c) {
1993        return compare(value, c.value);
1994    }
1995
1996    /**
1997     * Compares two {@code char} values.
1998     * @return 0 if lhs = rhs, less than 0 if lhs &lt; rhs, and greater than 0 if lhs &gt; rhs.
1999     * @since 1.7
2000     */
2001    public static int compare(char lhs, char rhs) {
2002        return lhs - rhs;
2003    }
2004
2005    /**
2006     * Returns a {@code Character} instance for the {@code char} value passed.
2007     * <p>
2008     * If it is not necessary to get a new {@code Character} instance, it is
2009     * recommended to use this method instead of the constructor, since it
2010     * maintains a cache of instances which may result in better performance.
2011     *
2012     * @param c
2013     *            the char value for which to get a {@code Character} instance.
2014     * @return the {@code Character} instance for {@code c}.
2015     * @since 1.5
2016     */
2017    public static Character valueOf(char c) {
2018        return c < 128 ? SMALL_VALUES[c] : new Character(c);
2019    }
2020
2021    /**
2022     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
2023     */
2024    private static final Character[] SMALL_VALUES = new Character[128];
2025
2026    static {
2027        for (int i = 0; i < 128; i++) {
2028            SMALL_VALUES[i] = new Character((char) i);
2029        }
2030    }
2031    /**
2032     * Indicates whether {@code codePoint} is a valid Unicode code point.
2033     *
2034     * @param codePoint
2035     *            the code point to test.
2036     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
2037     *         {@code false} otherwise.
2038     * @since 1.5
2039     */
2040    public static boolean isValidCodePoint(int codePoint) {
2041        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
2042    }
2043
2044    /**
2045     * Indicates whether {@code codePoint} is within the supplementary code
2046     * point range.
2047     *
2048     * @param codePoint
2049     *            the code point to test.
2050     * @return {@code true} if {@code codePoint} is within the supplementary
2051     *         code point range; {@code false} otherwise.
2052     * @since 1.5
2053     */
2054    public static boolean isSupplementaryCodePoint(int codePoint) {
2055        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
2056    }
2057
2058    /**
2059     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
2060     * that is used for representing supplementary characters in UTF-16
2061     * encoding.
2062     *
2063     * @param ch
2064     *            the character to test.
2065     * @return {@code true} if {@code ch} is a high-surrogate code unit;
2066     *         {@code false} otherwise.
2067     * @see #isLowSurrogate(char)
2068     * @since 1.5
2069     */
2070    public static boolean isHighSurrogate(char ch) {
2071        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
2072    }
2073
2074    /**
2075     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
2076     * that is used for representing supplementary characters in UTF-16
2077     * encoding.
2078     *
2079     * @param ch
2080     *            the character to test.
2081     * @return {@code true} if {@code ch} is a low-surrogate code unit;
2082     *         {@code false} otherwise.
2083     * @see #isHighSurrogate(char)
2084     * @since 1.5
2085     */
2086    public static boolean isLowSurrogate(char ch) {
2087        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
2088    }
2089
2090    /**
2091     * Returns true if the given character is a high or low surrogate.
2092     * @since 1.7
2093     */
2094    public static boolean isSurrogate(char ch) {
2095        return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
2096    }
2097
2098    /**
2099     * Indicates whether the specified character pair is a valid surrogate pair.
2100     *
2101     * @param high
2102     *            the high surrogate unit to test.
2103     * @param low
2104     *            the low surrogate unit to test.
2105     * @return {@code true} if {@code high} is a high-surrogate code unit and
2106     *         {@code low} is a low-surrogate code unit; {@code false}
2107     *         otherwise.
2108     * @see #isHighSurrogate(char)
2109     * @see #isLowSurrogate(char)
2110     * @since 1.5
2111     */
2112    public static boolean isSurrogatePair(char high, char low) {
2113        return (isHighSurrogate(high) && isLowSurrogate(low));
2114    }
2115
2116    /**
2117     * Calculates the number of {@code char} values required to represent the
2118     * specified Unicode code point. This method checks if the {@code codePoint}
2119     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
2120     * returned, otherwise {@code 1}. To test if the code point is valid, use
2121     * the {@link #isValidCodePoint(int)} method.
2122     *
2123     * @param codePoint
2124     *            the code point for which to calculate the number of required
2125     *            chars.
2126     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
2127     * @see #isValidCodePoint(int)
2128     * @see #isSupplementaryCodePoint(int)
2129     * @since 1.5
2130     */
2131    public static int charCount(int codePoint) {
2132        return (codePoint >= 0x10000 ? 2 : 1);
2133    }
2134
2135    /**
2136     * Converts a surrogate pair into a Unicode code point. This method assumes
2137     * that the pair are valid surrogates. If the pair are <i>not</i> valid
2138     * surrogates, then the result is indeterminate. The
2139     * {@link #isSurrogatePair(char, char)} method should be used prior to this
2140     * method to validate the pair.
2141     *
2142     * @param high
2143     *            the high surrogate unit.
2144     * @param low
2145     *            the low surrogate unit.
2146     * @return the Unicode code point corresponding to the surrogate unit pair.
2147     * @see #isSurrogatePair(char, char)
2148     * @since 1.5
2149     */
2150    public static int toCodePoint(char high, char low) {
2151        // See RFC 2781, Section 2.2
2152        // http://www.ietf.org/rfc/rfc2781.txt
2153        int h = (high & 0x3FF) << 10;
2154        int l = low & 0x3FF;
2155        return (h | l) + 0x10000;
2156    }
2157
2158    /**
2159     * Returns the code point at {@code index} in the specified sequence of
2160     * character units. If the unit at {@code index} is a high-surrogate unit,
2161     * {@code index + 1} is less than the length of the sequence and the unit at
2162     * {@code index + 1} is a low-surrogate unit, then the supplementary code
2163     * point represented by the pair is returned; otherwise the {@code char}
2164     * value at {@code index} is returned.
2165     *
2166     * @param seq
2167     *            the source sequence of {@code char} units.
2168     * @param index
2169     *            the position in {@code seq} from which to retrieve the code
2170     *            point.
2171     * @return the Unicode code point or {@code char} value at {@code index} in
2172     *         {@code seq}.
2173     * @throws NullPointerException
2174     *             if {@code seq} is {@code null}.
2175     * @throws IndexOutOfBoundsException
2176     *             if the {@code index} is negative or greater than or equal to
2177     *             the length of {@code seq}.
2178     * @since 1.5
2179     */
2180    public static int codePointAt(CharSequence seq, int index) {
2181        if (seq == null) {
2182            throw new NullPointerException("seq == null");
2183        }
2184        int len = seq.length();
2185        if (index < 0 || index >= len) {
2186            throw new IndexOutOfBoundsException();
2187        }
2188
2189        char high = seq.charAt(index++);
2190        if (index >= len) {
2191            return high;
2192        }
2193        char low = seq.charAt(index);
2194        if (isSurrogatePair(high, low)) {
2195            return toCodePoint(high, low);
2196        }
2197        return high;
2198    }
2199
2200    /**
2201     * Returns the code point at {@code index} in the specified array of
2202     * character units. If the unit at {@code index} is a high-surrogate unit,
2203     * {@code index + 1} is less than the length of the array and the unit at
2204     * {@code index + 1} is a low-surrogate unit, then the supplementary code
2205     * point represented by the pair is returned; otherwise the {@code char}
2206     * value at {@code index} is returned.
2207     *
2208     * @param seq
2209     *            the source array of {@code char} units.
2210     * @param index
2211     *            the position in {@code seq} from which to retrieve the code
2212     *            point.
2213     * @return the Unicode code point or {@code char} value at {@code index} in
2214     *         {@code seq}.
2215     * @throws NullPointerException
2216     *             if {@code seq} is {@code null}.
2217     * @throws IndexOutOfBoundsException
2218     *             if the {@code index} is negative or greater than or equal to
2219     *             the length of {@code seq}.
2220     * @since 1.5
2221     */
2222    public static int codePointAt(char[] seq, int index) {
2223        if (seq == null) {
2224            throw new NullPointerException("seq == null");
2225        }
2226        int len = seq.length;
2227        if (index < 0 || index >= len) {
2228            throw new IndexOutOfBoundsException();
2229        }
2230
2231        char high = seq[index++];
2232        if (index >= len) {
2233            return high;
2234        }
2235        char low = seq[index];
2236        if (isSurrogatePair(high, low)) {
2237            return toCodePoint(high, low);
2238        }
2239        return high;
2240    }
2241
2242    /**
2243     * Returns the code point at {@code index} in the specified array of
2244     * character units, where {@code index} has to be less than {@code limit}.
2245     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
2246     * is less than {@code limit} and the unit at {@code index + 1} is a
2247     * low-surrogate unit, then the supplementary code point represented by the
2248     * pair is returned; otherwise the {@code char} value at {@code index} is
2249     * returned.
2250     *
2251     * @param seq
2252     *            the source array of {@code char} units.
2253     * @param index
2254     *            the position in {@code seq} from which to get the code point.
2255     * @param limit
2256     *            the index after the last unit in {@code seq} that can be used.
2257     * @return the Unicode code point or {@code char} value at {@code index} in
2258     *         {@code seq}.
2259     * @throws NullPointerException
2260     *             if {@code seq} is {@code null}.
2261     * @throws IndexOutOfBoundsException
2262     *             if {@code index < 0}, {@code index >= limit},
2263     *             {@code limit < 0} or if {@code limit} is greater than the
2264     *             length of {@code seq}.
2265     * @since 1.5
2266     */
2267    public static int codePointAt(char[] seq, int index, int limit) {
2268        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
2269            throw new IndexOutOfBoundsException();
2270        }
2271
2272        char high = seq[index++];
2273        if (index >= limit) {
2274            return high;
2275        }
2276        char low = seq[index];
2277        if (isSurrogatePair(high, low)) {
2278            return toCodePoint(high, low);
2279        }
2280        return high;
2281    }
2282
2283    /**
2284     * Returns the code point that precedes {@code index} in the specified
2285     * sequence of character units. If the unit at {@code index - 1} is a
2286     * low-surrogate unit, {@code index - 2} is not negative and the unit at
2287     * {@code index - 2} is a high-surrogate unit, then the supplementary code
2288     * point represented by the pair is returned; otherwise the {@code char}
2289     * value at {@code index - 1} is returned.
2290     *
2291     * @param seq
2292     *            the source sequence of {@code char} units.
2293     * @param index
2294     *            the position in {@code seq} following the code
2295     *            point that should be returned.
2296     * @return the Unicode code point or {@code char} value before {@code index}
2297     *         in {@code seq}.
2298     * @throws NullPointerException
2299     *             if {@code seq} is {@code null}.
2300     * @throws IndexOutOfBoundsException
2301     *             if the {@code index} is less than 1 or greater than the
2302     *             length of {@code seq}.
2303     * @since 1.5
2304     */
2305    public static int codePointBefore(CharSequence seq, int index) {
2306        if (seq == null) {
2307            throw new NullPointerException("seq == null");
2308        }
2309        int len = seq.length();
2310        if (index < 1 || index > len) {
2311            throw new IndexOutOfBoundsException();
2312        }
2313
2314        char low = seq.charAt(--index);
2315        if (--index < 0) {
2316            return low;
2317        }
2318        char high = seq.charAt(index);
2319        if (isSurrogatePair(high, low)) {
2320            return toCodePoint(high, low);
2321        }
2322        return low;
2323    }
2324
2325    /**
2326     * Returns the code point that precedes {@code index} in the specified
2327     * array of character units. If the unit at {@code index - 1} is a
2328     * low-surrogate unit, {@code index - 2} is not negative and the unit at
2329     * {@code index - 2} is a high-surrogate unit, then the supplementary code
2330     * point represented by the pair is returned; otherwise the {@code char}
2331     * value at {@code index - 1} is returned.
2332     *
2333     * @param seq
2334     *            the source array of {@code char} units.
2335     * @param index
2336     *            the position in {@code seq} following the code
2337     *            point that should be returned.
2338     * @return the Unicode code point or {@code char} value before {@code index}
2339     *         in {@code seq}.
2340     * @throws NullPointerException
2341     *             if {@code seq} is {@code null}.
2342     * @throws IndexOutOfBoundsException
2343     *             if the {@code index} is less than 1 or greater than the
2344     *             length of {@code seq}.
2345     * @since 1.5
2346     */
2347    public static int codePointBefore(char[] seq, int index) {
2348        if (seq == null) {
2349            throw new NullPointerException("seq == null");
2350        }
2351        int len = seq.length;
2352        if (index < 1 || index > len) {
2353            throw new IndexOutOfBoundsException();
2354        }
2355
2356        char low = seq[--index];
2357        if (--index < 0) {
2358            return low;
2359        }
2360        char high = seq[index];
2361        if (isSurrogatePair(high, low)) {
2362            return toCodePoint(high, low);
2363        }
2364        return low;
2365    }
2366
2367    /**
2368     * Returns the code point that precedes the {@code index} in the specified
2369     * array of character units and is not less than {@code start}. If the unit
2370     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
2371     * less than {@code start} and the unit at {@code index - 2} is a
2372     * high-surrogate unit, then the supplementary code point represented by the
2373     * pair is returned; otherwise the {@code char} value at {@code index - 1}
2374     * is returned.
2375     *
2376     * @param seq
2377     *            the source array of {@code char} units.
2378     * @param index
2379     *            the position in {@code seq} following the code point that
2380     *            should be returned.
2381     * @param start
2382     *            the index of the first element in {@code seq}.
2383     * @return the Unicode code point or {@code char} value before {@code index}
2384     *         in {@code seq}.
2385     * @throws NullPointerException
2386     *             if {@code seq} is {@code null}.
2387     * @throws IndexOutOfBoundsException
2388     *             if the {@code index <= start}, {@code start < 0},
2389     *             {@code index} is greater than the length of {@code seq}, or
2390     *             if {@code start} is equal or greater than the length of
2391     *             {@code seq}.
2392     * @since 1.5
2393     */
2394    public static int codePointBefore(char[] seq, int index, int start) {
2395        if (seq == null) {
2396            throw new NullPointerException("seq == null");
2397        }
2398        int len = seq.length;
2399        if (index <= start || index > len || start < 0 || start >= len) {
2400            throw new IndexOutOfBoundsException();
2401        }
2402
2403        char low = seq[--index];
2404        if (--index < start) {
2405            return low;
2406        }
2407        char high = seq[index];
2408        if (isSurrogatePair(high, low)) {
2409            return toCodePoint(high, low);
2410        }
2411        return low;
2412    }
2413
2414    /**
2415     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2416     * and copies the value(s) into the char array {@code dst}, starting at
2417     * index {@code dstIndex}.
2418     *
2419     * @param codePoint
2420     *            the Unicode code point to encode.
2421     * @param dst
2422     *            the destination array to copy the encoded value into.
2423     * @param dstIndex
2424     *            the index in {@code dst} from where to start copying.
2425     * @return the number of {@code char} value units copied into {@code dst}.
2426     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2427     * @throws NullPointerException
2428     *             if {@code dst} is {@code null}.
2429     * @throws IndexOutOfBoundsException
2430     *             if {@code dstIndex} is negative, greater than or equal to
2431     *             {@code dst.length} or equals {@code dst.length - 1} when
2432     *             {@code codePoint} is a
2433     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
2434     * @since 1.5
2435     */
2436    public static int toChars(int codePoint, char[] dst, int dstIndex) {
2437        checkValidCodePoint(codePoint);
2438        if (dst == null) {
2439            throw new NullPointerException("dst == null");
2440        }
2441        if (dstIndex < 0 || dstIndex >= dst.length) {
2442            throw new IndexOutOfBoundsException();
2443        }
2444
2445        if (isSupplementaryCodePoint(codePoint)) {
2446            if (dstIndex == dst.length - 1) {
2447                throw new IndexOutOfBoundsException();
2448            }
2449            // See RFC 2781, Section 2.1
2450            // http://www.ietf.org/rfc/rfc2781.txt
2451            int cpPrime = codePoint - 0x10000;
2452            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2453            int low = 0xDC00 | (cpPrime & 0x3FF);
2454            dst[dstIndex] = (char) high;
2455            dst[dstIndex + 1] = (char) low;
2456            return 2;
2457        }
2458
2459        dst[dstIndex] = (char) codePoint;
2460        return 1;
2461    }
2462
2463    /**
2464     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2465     * and returns it as a char array.
2466     *
2467     * @param codePoint
2468     *            the Unicode code point to encode.
2469     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
2470     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
2471     *         then the returned array contains two characters, otherwise it
2472     *         contains just one character.
2473     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2474     * @since 1.5
2475     */
2476    public static char[] toChars(int codePoint) {
2477        checkValidCodePoint(codePoint);
2478        if (isSupplementaryCodePoint(codePoint)) {
2479            int cpPrime = codePoint - 0x10000;
2480            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2481            int low = 0xDC00 | (cpPrime & 0x3FF);
2482            return new char[] { (char) high, (char) low };
2483        }
2484        return new char[] { (char) codePoint };
2485    }
2486
2487    /**
2488     * Counts the number of Unicode code points in the subsequence of the
2489     * specified character sequence, as delineated by {@code beginIndex} and
2490     * {@code endIndex}. Any surrogate values with missing pair values will be
2491     * counted as one code point.
2492     *
2493     * @param seq
2494     *            the {@code CharSequence} to look through.
2495     * @param beginIndex
2496     *            the inclusive index to begin counting at.
2497     * @param endIndex
2498     *            the exclusive index to stop counting at.
2499     * @return the number of Unicode code points.
2500     * @throws NullPointerException
2501     *             if {@code seq} is {@code null}.
2502     * @throws IndexOutOfBoundsException
2503     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2504     *             if {@code endIndex} is greater than the length of {@code seq}.
2505     * @since 1.5
2506     */
2507    public static int codePointCount(CharSequence seq, int beginIndex,
2508            int endIndex) {
2509        if (seq == null) {
2510            throw new NullPointerException("seq == null");
2511        }
2512        int len = seq.length();
2513        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2514            throw new IndexOutOfBoundsException();
2515        }
2516
2517        int result = 0;
2518        for (int i = beginIndex; i < endIndex; i++) {
2519            char c = seq.charAt(i);
2520            if (isHighSurrogate(c)) {
2521                if (++i < endIndex) {
2522                    c = seq.charAt(i);
2523                    if (!isLowSurrogate(c)) {
2524                        result++;
2525                    }
2526                }
2527            }
2528            result++;
2529        }
2530        return result;
2531    }
2532
2533    /**
2534     * Counts the number of Unicode code points in the subsequence of the
2535     * specified char array, as delineated by {@code offset} and {@code count}.
2536     * Any surrogate values with missing pair values will be counted as one code
2537     * point.
2538     *
2539     * @param seq
2540     *            the char array to look through
2541     * @param offset
2542     *            the inclusive index to begin counting at.
2543     * @param count
2544     *            the number of {@code char} values to look through in
2545     *            {@code seq}.
2546     * @return the number of Unicode code points.
2547     * @throws NullPointerException
2548     *             if {@code seq} is {@code null}.
2549     * @throws IndexOutOfBoundsException
2550     *             if {@code offset < 0}, {@code count < 0} or if
2551     *             {@code offset + count} is greater than the length of
2552     *             {@code seq}.
2553     * @since 1.5
2554     */
2555    public static int codePointCount(char[] seq, int offset, int count) {
2556        Arrays.checkOffsetAndCount(seq.length, offset, count);
2557        int endIndex = offset + count;
2558        int result = 0;
2559        for (int i = offset; i < endIndex; i++) {
2560            char c = seq[i];
2561            if (isHighSurrogate(c)) {
2562                if (++i < endIndex) {
2563                    c = seq[i];
2564                    if (!isLowSurrogate(c)) {
2565                        result++;
2566                    }
2567                }
2568            }
2569            result++;
2570        }
2571        return result;
2572    }
2573
2574    /**
2575     * Determines the index in the specified character sequence that is offset
2576     * {@code codePointOffset} code points from {@code index}.
2577     *
2578     * @param seq
2579     *            the character sequence to find the index in.
2580     * @param index
2581     *            the start index in {@code seq}.
2582     * @param codePointOffset
2583     *            the number of code points to look backwards or forwards; may
2584     *            be a negative or positive value.
2585     * @return the index in {@code seq} that is {@code codePointOffset} code
2586     *         points away from {@code index}.
2587     * @throws NullPointerException
2588     *             if {@code seq} is {@code null}.
2589     * @throws IndexOutOfBoundsException
2590     *             if {@code index < 0}, {@code index} is greater than the
2591     *             length of {@code seq}, or if there are not enough values in
2592     *             {@code seq} to skip {@code codePointOffset} code points
2593     *             forwards or backwards (if {@code codePointOffset} is
2594     *             negative) from {@code index}.
2595     * @since 1.5
2596     */
2597    public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
2598        if (seq == null) {
2599            throw new NullPointerException("seq == null");
2600        }
2601        int len = seq.length();
2602        if (index < 0 || index > len) {
2603            throw new IndexOutOfBoundsException();
2604        }
2605
2606        if (codePointOffset == 0) {
2607            return index;
2608        }
2609
2610        if (codePointOffset > 0) {
2611            int codePoints = codePointOffset;
2612            int i = index;
2613            while (codePoints > 0) {
2614                codePoints--;
2615                if (i >= len) {
2616                    throw new IndexOutOfBoundsException();
2617                }
2618                if (isHighSurrogate(seq.charAt(i))) {
2619                    int next = i + 1;
2620                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2621                        i++;
2622                    }
2623                }
2624                i++;
2625            }
2626            return i;
2627        }
2628
2629        int codePoints = -codePointOffset;
2630        int i = index;
2631        while (codePoints > 0) {
2632            codePoints--;
2633            i--;
2634            if (i < 0) {
2635                throw new IndexOutOfBoundsException();
2636            }
2637            if (isLowSurrogate(seq.charAt(i))) {
2638                int prev = i - 1;
2639                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2640                    i--;
2641                }
2642            }
2643        }
2644        return i;
2645    }
2646
2647    /**
2648     * Determines the index in a subsequence of the specified character array
2649     * that is offset {@code codePointOffset} code points from {@code index}.
2650     * The subsequence is delineated by {@code start} and {@code count}.
2651     *
2652     * @param seq
2653     *            the character array to find the index in.
2654     * @param start
2655     *            the inclusive index that marks the beginning of the
2656     *            subsequence.
2657     * @param count
2658     *            the number of {@code char} values to include within the
2659     *            subsequence.
2660     * @param index
2661     *            the start index in the subsequence of the char array.
2662     * @param codePointOffset
2663     *            the number of code points to look backwards or forwards; may
2664     *            be a negative or positive value.
2665     * @return the index in {@code seq} that is {@code codePointOffset} code
2666     *         points away from {@code index}.
2667     * @throws NullPointerException
2668     *             if {@code seq} is {@code null}.
2669     * @throws IndexOutOfBoundsException
2670     *             if {@code start < 0}, {@code count < 0},
2671     *             {@code index < start}, {@code index > start + count},
2672     *             {@code start + count} is greater than the length of
2673     *             {@code seq}, or if there are not enough values in
2674     *             {@code seq} to skip {@code codePointOffset} code points
2675     *             forward or backward (if {@code codePointOffset} is
2676     *             negative) from {@code index}.
2677     * @since 1.5
2678     */
2679    public static int offsetByCodePoints(char[] seq, int start, int count,
2680            int index, int codePointOffset) {
2681        Arrays.checkOffsetAndCount(seq.length, start, count);
2682        int end = start + count;
2683        if (index < start || index > end) {
2684            throw new IndexOutOfBoundsException();
2685        }
2686
2687        if (codePointOffset == 0) {
2688            return index;
2689        }
2690
2691        if (codePointOffset > 0) {
2692            int codePoints = codePointOffset;
2693            int i = index;
2694            while (codePoints > 0) {
2695                codePoints--;
2696                if (i >= end) {
2697                    throw new IndexOutOfBoundsException();
2698                }
2699                if (isHighSurrogate(seq[i])) {
2700                    int next = i + 1;
2701                    if (next < end && isLowSurrogate(seq[next])) {
2702                        i++;
2703                    }
2704                }
2705                i++;
2706            }
2707            return i;
2708        }
2709
2710        int codePoints = -codePointOffset;
2711        int i = index;
2712        while (codePoints > 0) {
2713            codePoints--;
2714            i--;
2715            if (i < start) {
2716                throw new IndexOutOfBoundsException();
2717            }
2718            if (isLowSurrogate(seq[i])) {
2719                int prev = i - 1;
2720                if (prev >= start && isHighSurrogate(seq[prev])) {
2721                    i--;
2722                }
2723            }
2724        }
2725        return i;
2726    }
2727
2728    /**
2729     * Convenience method to determine the value of the specified character
2730     * {@code c} in the supplied radix. The value of {@code radix} must be
2731     * between MIN_RADIX and MAX_RADIX.
2732     *
2733     * @param c
2734     *            the character to determine the value of.
2735     * @param radix
2736     *            the radix.
2737     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2738     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2739     */
2740    public static int digit(char c, int radix) {
2741        return digit((int) c, radix);
2742    }
2743
2744    /**
2745     * Convenience method to determine the value of the character
2746     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2747     * be between MIN_RADIX and MAX_RADIX.
2748     *
2749     * @param codePoint
2750     *            the character, including supplementary characters.
2751     * @param radix
2752     *            the radix.
2753     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2754     *         {@link #MAX_RADIX} then the value of the character in the radix;
2755     *         -1 otherwise.
2756     */
2757    public static int digit(int codePoint, int radix) {
2758        if (radix < MIN_RADIX || radix > MAX_RADIX) {
2759            return -1;
2760        }
2761        if (codePoint < 128) {
2762            // Optimized for ASCII
2763            int result = -1;
2764            if ('0' <= codePoint && codePoint <= '9') {
2765                result = codePoint - '0';
2766            } else if ('a' <= codePoint && codePoint <= 'z') {
2767                result = 10 + (codePoint - 'a');
2768            } else if ('A' <= codePoint && codePoint <= 'Z') {
2769                result = 10 + (codePoint - 'A');
2770            }
2771            return result < radix ? result : -1;
2772        }
2773        return digitImpl(codePoint, radix);
2774    }
2775
2776    private static native int digitImpl(int codePoint, int radix);
2777
2778    /**
2779     * Compares this object with the specified object and indicates if they are
2780     * equal. In order to be equal, {@code object} must be an instance of
2781     * {@code Character} and have the same char value as this object.
2782     *
2783     * @param object
2784     *            the object to compare this double with.
2785     * @return {@code true} if the specified object is equal to this
2786     *         {@code Character}; {@code false} otherwise.
2787     */
2788    @Override
2789    public boolean equals(Object object) {
2790        return (object instanceof Character) && (((Character) object).value == value);
2791    }
2792
2793    /**
2794     * Returns the character which represents the specified digit in the
2795     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2796     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2797     * smaller than {@code radix}. If any of these conditions does not hold, 0
2798     * is returned.
2799     *
2800     * @param digit
2801     *            the integer value.
2802     * @param radix
2803     *            the radix.
2804     * @return the character which represents the {@code digit} in the
2805     *         {@code radix}.
2806     */
2807    public static char forDigit(int digit, int radix) {
2808        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2809            if (digit >= 0 && digit < radix) {
2810                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2811            }
2812        }
2813        return 0;
2814    }
2815
2816    /**
2817     * Returns a human-readable name for the given code point,
2818     * or null if the code point is unassigned.
2819     *
2820     * <p>As a fallback mechanism this method returns strings consisting of the Unicode
2821     * block name (with underscores replaced by spaces), a single space, and the uppercase
2822     * hex value of the code point, using as few digits as necessary.
2823     *
2824     * <p>Examples:
2825     * <ul>
2826     * <li>{@code Character.getName(0)} returns "NULL".
2827     * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E".
2828     * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX".
2829     * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000".
2830     * </ul>
2831     *
2832     * <p>Note that the exact strings returned will vary from release to release.
2833     *
2834     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2835     * @since 1.7
2836     */
2837    public static String getName(int codePoint) {
2838        checkValidCodePoint(codePoint);
2839        if (getType(codePoint) == Character.UNASSIGNED) {
2840            return null;
2841        }
2842        String result = getNameImpl(codePoint);
2843        if (result == null) {
2844            String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ');
2845            result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0);
2846        }
2847        return result;
2848    }
2849
2850    private static native String getNameImpl(int codePoint);
2851
2852    /**
2853     * Returns the numeric value of the specified Unicode character.
2854     * See {@link #getNumericValue(int)}.
2855     *
2856     * @param c the character
2857     * @return a non-negative numeric integer value if a numeric value for
2858     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2859     *         -2 if the numeric value can not be represented as an integer.
2860     */
2861    public static int getNumericValue(char c) {
2862        return getNumericValue((int) c);
2863    }
2864
2865    /**
2866     * Gets the numeric value of the specified Unicode code point. For example,
2867     * the code point '\u216B' stands for the Roman number XII, which has the
2868     * numeric value 12.
2869     *
2870     * <p>There are two points of divergence between this method and the Unicode
2871     * specification. This method treats the letters a-z (in both upper and lower
2872     * cases, and their full-width variants) as numbers from 10 to 35. The
2873     * Unicode specification also supports the idea of code points with non-integer
2874     * numeric values; this method does not (except to the extent of returning -2
2875     * for such code points).
2876     *
2877     * @param codePoint the code point
2878     * @return a non-negative numeric integer value if a numeric value for
2879     *         {@code codePoint} exists, -1 if there is no numeric value for
2880     *         {@code codePoint}, -2 if the numeric value can not be
2881     *         represented with an integer.
2882     */
2883    public static int getNumericValue(int codePoint) {
2884        // This is both an optimization and papers over differences between Java and ICU.
2885        if (codePoint < 128) {
2886            if (codePoint >= '0' && codePoint <= '9') {
2887                return codePoint - '0';
2888            }
2889            if (codePoint >= 'a' && codePoint <= 'z') {
2890                return codePoint - ('a' - 10);
2891            }
2892            if (codePoint >= 'A' && codePoint <= 'Z') {
2893                return codePoint - ('A' - 10);
2894            }
2895            return -1;
2896        }
2897        // Full-width uppercase A-Z.
2898        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
2899            return codePoint - 0xff17;
2900        }
2901        // Full-width lowercase a-z.
2902        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
2903            return codePoint - 0xff37;
2904        }
2905        return getNumericValueImpl(codePoint);
2906    }
2907
2908    private static native int getNumericValueImpl(int codePoint);
2909
2910    /**
2911     * Gets the general Unicode category of the specified character.
2912     *
2913     * @param c
2914     *            the character to get the category of.
2915     * @return the Unicode category of {@code c}.
2916     */
2917    public static int getType(char c) {
2918        return getType((int) c);
2919    }
2920
2921    /**
2922     * Gets the general Unicode category of the specified code point.
2923     *
2924     * @param codePoint
2925     *            the Unicode code point to get the category of.
2926     * @return the Unicode category of {@code codePoint}.
2927     */
2928    public static int getType(int codePoint) {
2929        int type = getTypeImpl(codePoint);
2930        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
2931        if (type <= Character.FORMAT) {
2932            return type;
2933        }
2934        return (type + 1);
2935    }
2936
2937    private static native int getTypeImpl(int codePoint);
2938
2939    /**
2940     * Gets the Unicode directionality of the specified character.
2941     *
2942     * @param c
2943     *            the character to get the directionality of.
2944     * @return the Unicode directionality of {@code c}.
2945     */
2946    public static byte getDirectionality(char c) {
2947        return getDirectionality((int)c);
2948    }
2949
2950    /**
2951     * Gets the Unicode directionality of the specified character.
2952     *
2953     * @param codePoint
2954     *            the Unicode code point to get the directionality of.
2955     * @return the Unicode directionality of {@code codePoint}.
2956     */
2957    public static byte getDirectionality(int codePoint) {
2958        if (getType(codePoint) == Character.UNASSIGNED) {
2959            return Character.DIRECTIONALITY_UNDEFINED;
2960        }
2961
2962        byte directionality = getDirectionalityImpl(codePoint);
2963        if (directionality == -1) {
2964            return -1;
2965        }
2966        return DIRECTIONALITY[directionality];
2967    }
2968
2969    private static native byte getDirectionalityImpl(int codePoint);
2970
2971    /**
2972     * Indicates whether the specified character is mirrored.
2973     *
2974     * @param c
2975     *            the character to check.
2976     * @return {@code true} if {@code c} is mirrored; {@code false}
2977     *         otherwise.
2978     */
2979    public static boolean isMirrored(char c) {
2980        return isMirrored((int) c);
2981    }
2982
2983    /**
2984     * Indicates whether the specified code point is mirrored.
2985     *
2986     * @param codePoint
2987     *            the code point to check.
2988     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2989     *         otherwise.
2990     */
2991    public static boolean isMirrored(int codePoint) {
2992        return isMirroredImpl(codePoint);
2993    }
2994
2995    private static native boolean isMirroredImpl(int codePoint);
2996
2997    @Override
2998    public int hashCode() {
2999        return value;
3000    }
3001
3002    /**
3003     * Returns the high surrogate for the given code point. The result is meaningless if
3004     * the given code point is not a supplementary character.
3005     * @since 1.7
3006     */
3007    public static char highSurrogate(int codePoint) {
3008        return (char) ((codePoint >> 10) + 0xd7c0);
3009    }
3010
3011    /**
3012     * Returns the low surrogate for the given code point. The result is meaningless if
3013     * the given code point is not a supplementary character.
3014     * @since 1.7
3015     */
3016    public static char lowSurrogate(int codePoint) {
3017        return (char) ((codePoint & 0x3ff) | 0xdc00);
3018    }
3019
3020    /**
3021     * Returns true if the given code point is alphabetic. That is,
3022     * if it is in any of the Lu, Ll, Lt, Lm, Lo, Nl, or Other_Alphabetic categories.
3023     * @since 1.7
3024     */
3025    public static native boolean isAlphabetic(int codePoint);
3026
3027    /**
3028     * Returns true if the given code point is in the Basic Multilingual Plane (BMP).
3029     * Such code points can be represented by a single {@code char}.
3030     * @since 1.7
3031     */
3032    public static boolean isBmpCodePoint(int codePoint) {
3033       return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE;
3034    }
3035
3036    /**
3037     * Indicates whether the specified character is defined in the Unicode
3038     * specification.
3039     *
3040     * @param c
3041     *            the character to check.
3042     * @return {@code true} if the general Unicode category of the character is
3043     *         not {@code UNASSIGNED}; {@code false} otherwise.
3044     */
3045    public static boolean isDefined(char c) {
3046        return isDefinedImpl(c);
3047    }
3048
3049    /**
3050     * Indicates whether the specified code point is defined in the Unicode
3051     * specification.
3052     *
3053     * @param codePoint
3054     *            the code point to check.
3055     * @return {@code true} if the general Unicode category of the code point is
3056     *         not {@code UNASSIGNED}; {@code false} otherwise.
3057     */
3058    public static boolean isDefined(int codePoint) {
3059        return isDefinedImpl(codePoint);
3060    }
3061
3062    private static native boolean isDefinedImpl(int codePoint);
3063
3064    /**
3065     * Indicates whether the specified character is a digit.
3066     *
3067     * @param c
3068     *            the character to check.
3069     * @return {@code true} if {@code c} is a digit; {@code false}
3070     *         otherwise.
3071     */
3072    public static boolean isDigit(char c) {
3073        return isDigit((int) c);
3074    }
3075
3076    /**
3077     * Indicates whether the specified code point is a digit.
3078     *
3079     * @param codePoint
3080     *            the code point to check.
3081     * @return {@code true} if {@code codePoint} is a digit; {@code false}
3082     *         otherwise.
3083     */
3084    public static boolean isDigit(int codePoint) {
3085        // Optimized case for ASCII
3086        if ('0' <= codePoint && codePoint <= '9') {
3087            return true;
3088        }
3089        if (codePoint < 1632) {
3090            return false;
3091        }
3092        return isDigitImpl(codePoint);
3093    }
3094
3095    private static native boolean isDigitImpl(int codePoint);
3096
3097    /**
3098     * Indicates whether the specified character is ignorable in a Java or
3099     * Unicode identifier.
3100     *
3101     * @param c
3102     *            the character to check.
3103     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
3104     */
3105    public static boolean isIdentifierIgnorable(char c) {
3106        return isIdentifierIgnorable((int) c);
3107    }
3108
3109    /**
3110     * Returns true if the given code point is a CJKV ideographic character.
3111     * @since 1.7
3112     */
3113    public static native boolean isIdeographic(int codePoint);
3114
3115    /**
3116     * Indicates whether the specified code point is ignorable in a Java or
3117     * Unicode identifier.
3118     *
3119     * @param codePoint
3120     *            the code point to check.
3121     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
3122     *         otherwise.
3123     */
3124    public static boolean isIdentifierIgnorable(int codePoint) {
3125        // This is both an optimization and papers over differences between Java and ICU.
3126        if (codePoint < 0x600) {
3127            return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
3128                    (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
3129        }
3130        return isIdentifierIgnorableImpl(codePoint);
3131    }
3132
3133    private static native boolean isIdentifierIgnorableImpl(int codePoint);
3134
3135    /**
3136     * Indicates whether the specified character is an ISO control character.
3137     *
3138     * @param c
3139     *            the character to check.
3140     * @return {@code true} if {@code c} is an ISO control character;
3141     *         {@code false} otherwise.
3142     */
3143    public static boolean isISOControl(char c) {
3144        return isISOControl((int) c);
3145    }
3146
3147    /**
3148     * Indicates whether the specified code point is an ISO control character.
3149     *
3150     * @param c
3151     *            the code point to check.
3152     * @return {@code true} if {@code c} is an ISO control character;
3153     *         {@code false} otherwise.
3154     */
3155    public static boolean isISOControl(int c) {
3156        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
3157    }
3158
3159    /**
3160     * Indicates whether the specified character is a valid part of a Java
3161     * identifier other than the first character.
3162     *
3163     * @param c
3164     *            the character to check.
3165     * @return {@code true} if {@code c} is valid as part of a Java identifier;
3166     *         {@code false} otherwise.
3167     */
3168    public static boolean isJavaIdentifierPart(char c) {
3169        return isJavaIdentifierPart((int) c);
3170    }
3171
3172    /**
3173     * Indicates whether the specified code point is a valid part of a Java
3174     * identifier other than the first character.
3175     *
3176     * @param codePoint
3177     *            the code point to check.
3178     * @return {@code true} if {@code c} is valid as part of a Java identifier;
3179     *         {@code false} otherwise.
3180     */
3181    public static boolean isJavaIdentifierPart(int codePoint) {
3182        // Use precomputed bitmasks to optimize the ASCII range.
3183        if (codePoint < 64) {
3184            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
3185        } else if (codePoint < 128) {
3186            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
3187        }
3188        int type = getType(codePoint);
3189        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
3190                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
3191                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
3192                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
3193                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
3194                || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
3195    }
3196
3197    /**
3198     * Indicates whether the specified character is a valid first character for
3199     * a Java identifier.
3200     *
3201     * @param c
3202     *            the character to check.
3203     * @return {@code true} if {@code c} is a valid first character of a Java
3204     *         identifier; {@code false} otherwise.
3205     */
3206    public static boolean isJavaIdentifierStart(char c) {
3207        return isJavaIdentifierStart((int) c);
3208    }
3209
3210    /**
3211     * Indicates whether the specified code point is a valid first character for
3212     * a Java identifier.
3213     *
3214     * @param codePoint
3215     *            the code point to check.
3216     * @return {@code true} if {@code codePoint} is a valid start of a Java
3217     *         identifier; {@code false} otherwise.
3218     */
3219    public static boolean isJavaIdentifierStart(int codePoint) {
3220        // Use precomputed bitmasks to optimize the ASCII range.
3221        if (codePoint < 64) {
3222            return (codePoint == '$'); // There's only one character in this range.
3223        } else if (codePoint < 128) {
3224            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
3225        }
3226        int type = getType(codePoint);
3227        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
3228                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
3229    }
3230
3231    /**
3232     * Indicates whether the specified character is a Java letter.
3233     *
3234     * @param c
3235     *            the character to check.
3236     * @return {@code true} if {@code c} is a Java letter; {@code false}
3237     *         otherwise.
3238     * @deprecated Use {@link #isJavaIdentifierStart(char)} instead.
3239     */
3240    @Deprecated
3241    public static boolean isJavaLetter(char c) {
3242        return isJavaIdentifierStart(c);
3243    }
3244
3245    /**
3246     * Indicates whether the specified character is a Java letter or digit
3247     * character.
3248     *
3249     * @param c
3250     *            the character to check.
3251     * @return {@code true} if {@code c} is a Java letter or digit;
3252     *         {@code false} otherwise.
3253     * @deprecated Use {@link #isJavaIdentifierPart(char)} instead.
3254     */
3255    @Deprecated
3256    public static boolean isJavaLetterOrDigit(char c) {
3257        return isJavaIdentifierPart(c);
3258    }
3259
3260    /**
3261     * Indicates whether the specified character is a letter.
3262     *
3263     * @param c
3264     *            the character to check.
3265     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
3266     */
3267    public static boolean isLetter(char c) {
3268        return isLetter((int) c);
3269    }
3270
3271    /**
3272     * Indicates whether the specified code point is a letter.
3273     *
3274     * @param codePoint
3275     *            the code point to check.
3276     * @return {@code true} if {@code codePoint} is a letter; {@code false}
3277     *         otherwise.
3278     */
3279    public static boolean isLetter(int codePoint) {
3280        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
3281            return true;
3282        }
3283        if (codePoint < 128) {
3284            return false;
3285        }
3286        return isLetterImpl(codePoint);
3287    }
3288
3289    private static native boolean isLetterImpl(int codePoint);
3290
3291    /**
3292     * Indicates whether the specified character is a letter or a digit.
3293     *
3294     * @param c
3295     *            the character to check.
3296     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
3297     *         otherwise.
3298     */
3299    public static boolean isLetterOrDigit(char c) {
3300        return isLetterOrDigit((int) c);
3301    }
3302
3303    /**
3304     * Indicates whether the specified code point is a letter or a digit.
3305     *
3306     * @param codePoint
3307     *            the code point to check.
3308     * @return {@code true} if {@code codePoint} is a letter or a digit;
3309     *         {@code false} otherwise.
3310     */
3311    public static boolean isLetterOrDigit(int codePoint) {
3312        // Optimized case for ASCII
3313        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
3314            return true;
3315        }
3316        if ('0' <= codePoint && codePoint <= '9') {
3317            return true;
3318        }
3319        if (codePoint < 128) {
3320            return false;
3321        }
3322        return isLetterOrDigitImpl(codePoint);
3323    }
3324
3325    private static native boolean isLetterOrDigitImpl(int codePoint);
3326
3327    /**
3328     * Indicates whether the specified character is a lower case letter.
3329     *
3330     * @param c
3331     *            the character to check.
3332     * @return {@code true} if {@code c} is a lower case letter; {@code false}
3333     *         otherwise.
3334     */
3335    public static boolean isLowerCase(char c) {
3336        return isLowerCase((int) c);
3337    }
3338
3339    /**
3340     * Indicates whether the specified code point is a lower case letter.
3341     *
3342     * @param codePoint
3343     *            the code point to check.
3344     * @return {@code true} if {@code codePoint} is a lower case letter;
3345     *         {@code false} otherwise.
3346     */
3347    public static boolean isLowerCase(int codePoint) {
3348        // Optimized case for ASCII
3349        if ('a' <= codePoint && codePoint <= 'z') {
3350            return true;
3351        }
3352        if (codePoint < 128) {
3353            return false;
3354        }
3355        return isLowerCaseImpl(codePoint);
3356    }
3357
3358    private static native boolean isLowerCaseImpl(int codePoint);
3359
3360    /**
3361     * Use {@link #isWhitespace(char)} instead.
3362     * @deprecated Use {@link #isWhitespace(char)} instead.
3363     */
3364    @Deprecated
3365    public static boolean isSpace(char c) {
3366        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
3367    }
3368
3369    /**
3370     * See {@link #isSpaceChar(int)}.
3371     */
3372    public static boolean isSpaceChar(char c) {
3373        return isSpaceChar((int) c);
3374    }
3375
3376    /**
3377     * Returns true if the given code point is a Unicode space character.
3378     * The exact set of characters considered as whitespace varies with Unicode version.
3379     * Note that non-breaking spaces are considered whitespace.
3380     * Note also that line separators are not considered whitespace; see {@link #isWhitespace}
3381     * for an alternative.
3382     */
3383    public static boolean isSpaceChar(int codePoint) {
3384        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
3385        // SPACE or NO-BREAK SPACE?
3386        if (codePoint == 0x20 || codePoint == 0xa0) {
3387            return true;
3388        }
3389        if (codePoint < 0x1000) {
3390            return false;
3391        }
3392        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
3393        if (codePoint == 0x1680 || codePoint == 0x180e) {
3394            return true;
3395        }
3396        if (codePoint < 0x2000) {
3397            return false;
3398        }
3399        if (codePoint <= 0xffff) {
3400            // Other whitespace from General Punctuation...
3401            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
3402                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
3403        }
3404        // Let icu4c worry about non-BMP code points.
3405        return isSpaceCharImpl(codePoint);
3406    }
3407
3408    private static native boolean isSpaceCharImpl(int codePoint);
3409
3410    /**
3411     * Indicates whether the specified character is a titlecase character.
3412     *
3413     * @param c
3414     *            the character to check.
3415     * @return {@code true} if {@code c} is a titlecase character, {@code false}
3416     *         otherwise.
3417     */
3418    public static boolean isTitleCase(char c) {
3419        return isTitleCaseImpl(c);
3420    }
3421
3422    /**
3423     * Indicates whether the specified code point is a titlecase character.
3424     *
3425     * @param codePoint
3426     *            the code point to check.
3427     * @return {@code true} if {@code codePoint} is a titlecase character,
3428     *         {@code false} otherwise.
3429     */
3430    public static boolean isTitleCase(int codePoint) {
3431        return isTitleCaseImpl(codePoint);
3432    }
3433
3434    private static native boolean isTitleCaseImpl(int codePoint);
3435
3436    /**
3437     * Indicates whether the specified character is valid as part of a Unicode
3438     * identifier other than the first character.
3439     *
3440     * @param c
3441     *            the character to check.
3442     * @return {@code true} if {@code c} is valid as part of a Unicode
3443     *         identifier; {@code false} otherwise.
3444     */
3445    public static boolean isUnicodeIdentifierPart(char c) {
3446        return isUnicodeIdentifierPartImpl(c);
3447    }
3448
3449    /**
3450     * Indicates whether the specified code point is valid as part of a Unicode
3451     * identifier other than the first character.
3452     *
3453     * @param codePoint
3454     *            the code point to check.
3455     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
3456     *         identifier; {@code false} otherwise.
3457     */
3458    public static boolean isUnicodeIdentifierPart(int codePoint) {
3459        return isUnicodeIdentifierPartImpl(codePoint);
3460    }
3461
3462    private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
3463
3464    /**
3465     * Indicates whether the specified character is a valid initial character
3466     * for a Unicode identifier.
3467     *
3468     * @param c
3469     *            the character to check.
3470     * @return {@code true} if {@code c} is a valid first character for a
3471     *         Unicode identifier; {@code false} otherwise.
3472     */
3473    public static boolean isUnicodeIdentifierStart(char c) {
3474        return isUnicodeIdentifierStartImpl(c);
3475    }
3476
3477    /**
3478     * Indicates whether the specified code point is a valid initial character
3479     * for a Unicode identifier.
3480     *
3481     * @param codePoint
3482     *            the code point to check.
3483     * @return {@code true} if {@code codePoint} is a valid first character for
3484     *         a Unicode identifier; {@code false} otherwise.
3485     */
3486    public static boolean isUnicodeIdentifierStart(int codePoint) {
3487        return isUnicodeIdentifierStartImpl(codePoint);
3488    }
3489
3490    private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
3491
3492    /**
3493     * Indicates whether the specified character is an upper case letter.
3494     *
3495     * @param c
3496     *            the character to check.
3497     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3498     *         otherwise.
3499     */
3500    public static boolean isUpperCase(char c) {
3501        return isUpperCase((int) c);
3502    }
3503
3504    /**
3505     * Indicates whether the specified code point is an upper case letter.
3506     *
3507     * @param codePoint
3508     *            the code point to check.
3509     * @return {@code true} if {@code codePoint} is a upper case letter;
3510     *         {@code false} otherwise.
3511     */
3512    public static boolean isUpperCase(int codePoint) {
3513        // Optimized case for ASCII
3514        if ('A' <= codePoint && codePoint <= 'Z') {
3515            return true;
3516        }
3517        if (codePoint < 128) {
3518            return false;
3519        }
3520        return isUpperCaseImpl(codePoint);
3521    }
3522
3523    private static native boolean isUpperCaseImpl(int codePoint);
3524
3525    /**
3526     * See {@link #isWhitespace(int)}.
3527     */
3528    public static boolean isWhitespace(char c) {
3529        return isWhitespace((int) c);
3530    }
3531
3532    /**
3533     * Returns true if the given code point is a Unicode whitespace character.
3534     * The exact set of characters considered as whitespace varies with Unicode version.
3535     * Note that non-breaking spaces are not considered whitespace.
3536     * Note also that line separators are considered whitespace; see {@link #isSpaceChar}
3537     * for an alternative.
3538     */
3539    public static boolean isWhitespace(int codePoint) {
3540        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
3541        // Any ASCII whitespace character?
3542        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
3543            return true;
3544        }
3545        if (codePoint < 0x1000) {
3546            return false;
3547        }
3548        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
3549        if (codePoint == 0x1680 || codePoint == 0x180e) {
3550            return true;
3551        }
3552        if (codePoint < 0x2000) {
3553            return false;
3554        }
3555        // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
3556        if (codePoint == 0x2007 || codePoint == 0x202f) {
3557            return false;
3558        }
3559        if (codePoint <= 0xffff) {
3560            // Other whitespace from General Punctuation...
3561            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
3562                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
3563        }
3564        // Let icu4c worry about non-BMP code points.
3565        return isWhitespaceImpl(codePoint);
3566    }
3567
3568    private static native boolean isWhitespaceImpl(int codePoint);
3569
3570    /**
3571     * Reverses the order of the first and second byte in the specified
3572     * character.
3573     *
3574     * @param c
3575     *            the character to reverse.
3576     * @return the character with reordered bytes.
3577     */
3578    public static char reverseBytes(char c) {
3579        return (char)((c<<8) | (c>>8));
3580    }
3581
3582    /**
3583     * Returns the lower case equivalent for the specified character if the
3584     * character is an upper case letter. Otherwise, the specified character is
3585     * returned unchanged.
3586     *
3587     * @param c
3588     *            the character
3589     * @return if {@code c} is an upper case character then its lower case
3590     *         counterpart, otherwise just {@code c}.
3591     */
3592    public static char toLowerCase(char c) {
3593        return (char) toLowerCase((int) c);
3594    }
3595
3596    /**
3597     * Returns the lower case equivalent for the specified code point if it is
3598     * an upper case letter. Otherwise, the specified code point is returned
3599     * unchanged.
3600     *
3601     * @param codePoint
3602     *            the code point to check.
3603     * @return if {@code codePoint} is an upper case character then its lower
3604     *         case counterpart, otherwise just {@code codePoint}.
3605     */
3606    public static int toLowerCase(int codePoint) {
3607        // Optimized case for ASCII
3608        if ('A' <= codePoint && codePoint <= 'Z') {
3609            return (char) (codePoint + ('a' - 'A'));
3610        }
3611        if (codePoint < 192) {
3612            return codePoint;
3613        }
3614        return toLowerCaseImpl(codePoint);
3615    }
3616
3617    private static native int toLowerCaseImpl(int codePoint);
3618
3619    @Override
3620    public String toString() {
3621        return String.valueOf(value);
3622    }
3623
3624    /**
3625     * Converts the specified character to its string representation.
3626     *
3627     * @param value
3628     *            the character to convert.
3629     * @return the character converted to a string.
3630     */
3631    public static String toString(char value) {
3632        return String.valueOf(value);
3633    }
3634
3635    /**
3636     * Returns the title case equivalent for the specified character if it
3637     * exists. Otherwise, the specified character is returned unchanged.
3638     *
3639     * @param c
3640     *            the character to convert.
3641     * @return the title case equivalent of {@code c} if it exists, otherwise
3642     *         {@code c}.
3643     */
3644    public static char toTitleCase(char c) {
3645        return (char) toTitleCaseImpl(c);
3646    }
3647
3648    /**
3649     * Returns the title case equivalent for the specified code point if it
3650     * exists. Otherwise, the specified code point is returned unchanged.
3651     *
3652     * @param codePoint
3653     *            the code point to convert.
3654     * @return the title case equivalent of {@code codePoint} if it exists,
3655     *         otherwise {@code codePoint}.
3656     */
3657    public static int toTitleCase(int codePoint) {
3658        return toTitleCaseImpl(codePoint);
3659    }
3660
3661    private static native int toTitleCaseImpl(int codePoint);
3662
3663    /**
3664     * Returns the upper case equivalent for the specified character if the
3665     * character is a lower case letter. Otherwise, the specified character is
3666     * returned unchanged.
3667     *
3668     * @param c
3669     *            the character to convert.
3670     * @return if {@code c} is a lower case character then its upper case
3671     *         counterpart, otherwise just {@code c}.
3672     */
3673    public static char toUpperCase(char c) {
3674        return (char) toUpperCase((int) c);
3675    }
3676
3677    /**
3678     * Returns the upper case equivalent for the specified code point if the
3679     * code point is a lower case letter. Otherwise, the specified code point is
3680     * returned unchanged.
3681     *
3682     * @param codePoint
3683     *            the code point to convert.
3684     * @return if {@code codePoint} is a lower case character then its upper
3685     *         case counterpart, otherwise just {@code codePoint}.
3686     */
3687    public static int toUpperCase(int codePoint) {
3688        // Optimized case for ASCII
3689        if ('a' <= codePoint && codePoint <= 'z') {
3690            return (char) (codePoint - ('a' - 'A'));
3691        }
3692        if (codePoint < 181) {
3693            return codePoint;
3694        }
3695        return toUpperCaseImpl(codePoint);
3696    }
3697
3698    private static native int toUpperCaseImpl(int codePoint);
3699}
3700