Character.java revision 276c5cd70991e814f085bf417cb647dce9bb55e4
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21import java.util.Arrays;
22
23/**
24 * The wrapper for the primitive type {@code char}. This class also provides a
25 * number of utility methods for working with characters.
26 *
27 * <p>Character data is kept up to date as Unicode evolves.
28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
29 * the {@code Locale} documentation for details of the Unicode versions implemented by current
30 * and historical Android releases.
31 *
32 * <p>The Unicode specification, character tables, and other information are available at
33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
34 *
35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
39 * encoding and {@code char} pairs are used to represent code points in the
40 * supplementary range. A pair of {@code char} values that represent a
41 * supplementary character are made up of a <i>high surrogate</i> with a value
42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
43 * 0xDC00 to 0xDFFF.
44 * <p>
45 * On the Java platform a {@code char} value represents either a single BMP code
46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
47 * is used to represent all Unicode code points.
48 *
49 * <a name="unicode_categories"><h3>Unicode categories</h3></a>
50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
51 * grouped semantically to provide a convenient overview. This table is also useful in
52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
53 * <span class="datatable">
54 * <style type="text/css">
55 * .datatable td { padding-right: 20px; }
56 * </style>
57 * <p><table>
58 * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
59 * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
60 * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
61 * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
62 * <tr> <td> Cf </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
63 * <tr> <td><br></td> </tr>
64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
67 * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
68 * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
69 * <tr> <td><br></td> </tr>
70 * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
71 * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
73 * <tr> <td><br></td> </tr>
74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
75 * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
76 * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
77 * <tr> <td><br></td> </tr>
78 * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
79 * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
80 * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
81 * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
83 * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
84 * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
85 * <tr> <td><br></td> </tr>
86 * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
89 * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
90 * <tr> <td><br></td> </tr>
91 * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
92 * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
94 * </table>
95 * </span>
96 *
97 * @since 1.0
98 */
99public final class Character implements Serializable, Comparable<Character> {
100    private static final long serialVersionUID = 3786198910865385080L;
101
102    private final char value;
103
104    /**
105     * The minimum {@code Character} value.
106     */
107    public static final char MIN_VALUE = '\u0000';
108
109    /**
110     * The maximum {@code Character} value.
111     */
112    public static final char MAX_VALUE = '\uffff';
113
114    /**
115     * The minimum radix used for conversions between characters and integers.
116     */
117    public static final int MIN_RADIX = 2;
118
119    /**
120     * The maximum radix used for conversions between characters and integers.
121     */
122    public static final int MAX_RADIX = 36;
123
124    /**
125     * The {@link Class} object that represents the primitive type {@code char}.
126     */
127    @SuppressWarnings("unchecked")
128    public static final Class<Character> TYPE
129            = (Class<Character>) char[].class.getComponentType();
130    // Note: Character.TYPE can't be set to "char.class", since *that* is
131    // defined to be "java.lang.Character.TYPE";
132
133    /**
134     * Unicode category constant Cn.
135     */
136    public static final byte UNASSIGNED = 0;
137
138    /**
139     * Unicode category constant Lu.
140     */
141    public static final byte UPPERCASE_LETTER = 1;
142
143    /**
144     * Unicode category constant Ll.
145     */
146    public static final byte LOWERCASE_LETTER = 2;
147
148    /**
149     * Unicode category constant Lt.
150     */
151    public static final byte TITLECASE_LETTER = 3;
152
153    /**
154     * Unicode category constant Lm.
155     */
156    public static final byte MODIFIER_LETTER = 4;
157
158    /**
159     * Unicode category constant Lo.
160     */
161    public static final byte OTHER_LETTER = 5;
162
163    /**
164     * Unicode category constant Mn.
165     */
166    public static final byte NON_SPACING_MARK = 6;
167
168    /**
169     * Unicode category constant Me.
170     */
171    public static final byte ENCLOSING_MARK = 7;
172
173    /**
174     * Unicode category constant Mc.
175     */
176    public static final byte COMBINING_SPACING_MARK = 8;
177
178    /**
179     * Unicode category constant Nd.
180     */
181    public static final byte DECIMAL_DIGIT_NUMBER = 9;
182
183    /**
184     * Unicode category constant Nl.
185     */
186    public static final byte LETTER_NUMBER = 10;
187
188    /**
189     * Unicode category constant No.
190     */
191    public static final byte OTHER_NUMBER = 11;
192
193    /**
194     * Unicode category constant Zs.
195     */
196    public static final byte SPACE_SEPARATOR = 12;
197
198    /**
199     * Unicode category constant Zl.
200     */
201    public static final byte LINE_SEPARATOR = 13;
202
203    /**
204     * Unicode category constant Zp.
205     */
206    public static final byte PARAGRAPH_SEPARATOR = 14;
207
208    /**
209     * Unicode category constant Cc.
210     */
211    public static final byte CONTROL = 15;
212
213    /**
214     * Unicode category constant Cf.
215     */
216    public static final byte FORMAT = 16;
217
218    /**
219     * Unicode category constant Co.
220     */
221    public static final byte PRIVATE_USE = 18;
222
223    /**
224     * Unicode category constant Cs.
225     */
226    public static final byte SURROGATE = 19;
227
228    /**
229     * Unicode category constant Pd.
230     */
231    public static final byte DASH_PUNCTUATION = 20;
232
233    /**
234     * Unicode category constant Ps.
235     */
236    public static final byte START_PUNCTUATION = 21;
237
238    /**
239     * Unicode category constant Pe.
240     */
241    public static final byte END_PUNCTUATION = 22;
242
243    /**
244     * Unicode category constant Pc.
245     */
246    public static final byte CONNECTOR_PUNCTUATION = 23;
247
248    /**
249     * Unicode category constant Po.
250     */
251    public static final byte OTHER_PUNCTUATION = 24;
252
253    /**
254     * Unicode category constant Sm.
255     */
256    public static final byte MATH_SYMBOL = 25;
257
258    /**
259     * Unicode category constant Sc.
260     */
261    public static final byte CURRENCY_SYMBOL = 26;
262
263    /**
264     * Unicode category constant Sk.
265     */
266    public static final byte MODIFIER_SYMBOL = 27;
267
268    /**
269     * Unicode category constant So.
270     */
271    public static final byte OTHER_SYMBOL = 28;
272
273    /**
274     * Unicode category constant Pi.
275     *
276     * @since 1.4
277     */
278    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
279
280    /**
281     * Unicode category constant Pf.
282     *
283     * @since 1.4
284     */
285    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
286
287    /**
288     * Unicode bidirectional constant.
289     *
290     * @since 1.4
291     */
292    public static final byte DIRECTIONALITY_UNDEFINED = -1;
293
294    /**
295     * Unicode bidirectional constant L.
296     *
297     * @since 1.4
298     */
299    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
300
301    /**
302     * Unicode bidirectional constant R.
303     *
304     * @since 1.4
305     */
306    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
307
308    /**
309     * Unicode bidirectional constant AL.
310     *
311     * @since 1.4
312     */
313    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
314
315    /**
316     * Unicode bidirectional constant EN.
317     *
318     * @since 1.4
319     */
320    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
321
322    /**
323     * Unicode bidirectional constant ES.
324     *
325     * @since 1.4
326     */
327    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
328
329    /**
330     * Unicode bidirectional constant ET.
331     *
332     * @since 1.4
333     */
334    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
335
336    /**
337     * Unicode bidirectional constant AN.
338     *
339     * @since 1.4
340     */
341    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
342
343    /**
344     * Unicode bidirectional constant CS.
345     *
346     * @since 1.4
347     */
348    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
349
350    /**
351     * Unicode bidirectional constant NSM.
352     *
353     * @since 1.4
354     */
355    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
356
357    /**
358     * Unicode bidirectional constant BN.
359     *
360     * @since 1.4
361     */
362    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
363
364    /**
365     * Unicode bidirectional constant B.
366     *
367     * @since 1.4
368     */
369    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
370
371    /**
372     * Unicode bidirectional constant S.
373     *
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
377
378    /**
379     * Unicode bidirectional constant WS.
380     *
381     * @since 1.4
382     */
383    public static final byte DIRECTIONALITY_WHITESPACE = 12;
384
385    /**
386     * Unicode bidirectional constant ON.
387     *
388     * @since 1.4
389     */
390    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
391
392    /**
393     * Unicode bidirectional constant LRE.
394     *
395     * @since 1.4
396     */
397    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
398
399    /**
400     * Unicode bidirectional constant LRO.
401     *
402     * @since 1.4
403     */
404    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
405
406    /**
407     * Unicode bidirectional constant RLE.
408     *
409     * @since 1.4
410     */
411    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
412
413    /**
414     * Unicode bidirectional constant RLO.
415     *
416     * @since 1.4
417     */
418    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
419
420    /**
421     * Unicode bidirectional constant PDF.
422     *
423     * @since 1.4
424     */
425    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
426
427    /**
428     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
429     * encoding, {@code '\uD800'}.
430     *
431     * @since 1.5
432     */
433    public static final char MIN_HIGH_SURROGATE = '\uD800';
434
435    /**
436     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
437     * encoding, {@code '\uDBFF'}.
438     *
439     * @since 1.5
440     */
441    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
442
443    /**
444     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
445     * encoding, {@code '\uDC00'}.
446     *
447     * @since 1.5
448     */
449    public static final char MIN_LOW_SURROGATE = '\uDC00';
450
451    /**
452     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
453     * encoding, {@code '\uDFFF'}.
454     *
455     * @since 1.5
456     */
457    public static final char MAX_LOW_SURROGATE = '\uDFFF';
458
459    /**
460     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
461     *
462     * @since 1.5
463     */
464    public static final char MIN_SURROGATE = '\uD800';
465
466    /**
467     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
468     *
469     * @since 1.5
470     */
471    public static final char MAX_SURROGATE = '\uDFFF';
472
473    /**
474     * The minimum value of a supplementary code point, {@code U+010000}.
475     *
476     * @since 1.5
477     */
478    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
479
480    /**
481     * The minimum code point value, {@code U+0000}.
482     *
483     * @since 1.5
484     */
485    public static final int MIN_CODE_POINT = 0x000000;
486
487    /**
488     * The maximum code point value, {@code U+10FFFF}.
489     *
490     * @since 1.5
491     */
492    public static final int MAX_CODE_POINT = 0x10FFFF;
493
494    /**
495     * The number of bits required to represent a {@code Character} value
496     * unsigned form.
497     *
498     * @since 1.5
499     */
500    public static final int SIZE = 16;
501
502    private static final byte[] DIRECTIONALITY = new byte[] {
503            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
504            DIRECTIONALITY_EUROPEAN_NUMBER,
505            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
506            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
507            DIRECTIONALITY_ARABIC_NUMBER,
508            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
509            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
510            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
511            DIRECTIONALITY_OTHER_NEUTRALS,
512            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
513            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
514            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
515            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
516            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
517            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
518            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
519
520    /*
521     * Represents a subset of the Unicode character set.
522     */
523    public static class Subset {
524        String name;
525
526        /**
527         * Constructs a new {@code Subset}.
528         *
529         * @param string
530         *            this subset's name.
531         */
532        protected Subset(String string) {
533            if (string == null) {
534                throw new NullPointerException();
535            }
536            name = string;
537        }
538
539        /**
540         * Compares this character subset with the specified object. Uses
541         * {@link java.lang.Object#equals(Object)} to do the comparison.
542         *
543         * @param object
544         *            the object to compare this character subset with.
545         * @return {@code true} if {@code object} is this subset, that is, if
546         *         {@code object == this}; {@code false} otherwise.
547         */
548        @Override
549        public final boolean equals(Object object) {
550            return super.equals(object);
551        }
552
553        /**
554         * Returns the integer hash code for this character subset.
555         *
556         * @return this subset's hash code, which is the hash code computed by
557         *         {@link java.lang.Object#hashCode()}.
558         */
559        @Override
560        public final int hashCode() {
561            return super.hashCode();
562        }
563
564        /**
565         * Returns the string representation of this subset.
566         *
567         * @return this subset's name.
568         */
569        @Override
570        public final String toString() {
571            return name;
572        }
573    }
574
575    /**
576     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
577     * specification.
578     *
579     * @since 1.2
580     */
581    public static final class UnicodeBlock extends Subset {
582        /**
583         * The &quot;Surrogates Area&quot; Unicode Block.
584         *
585         * @deprecated As of Java 5, this block has been replaced by
586         *             {@link #HIGH_SURROGATES},
587         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
588         *             {@link #LOW_SURROGATES}.
589         */
590        @Deprecated
591        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
592        /**
593         * The &quot;Basic Latin&quot; Unicode Block.
594         *
595         * @since 1.2
596         */
597        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
598        /**
599         * The &quot;Latin-1 Supplement&quot; Unicode Block.
600         *
601         * @since 1.2
602         */
603        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
604        /**
605         * The &quot;Latin Extended-A&quot; Unicode Block.
606         *
607         * @since 1.2
608         */
609        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
610        /**
611         * The &quot;Latin Extended-B&quot; Unicode Block.
612         *
613         * @since 1.2
614         */
615        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
616        /**
617         * The &quot;IPA Extensions&quot; Unicode Block.
618         *
619         * @since 1.2
620         */
621        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
622        /**
623         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
624         *
625         * @since 1.2
626         */
627        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
628        /**
629         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
630         *
631         * @since 1.2
632         */
633        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
634        /**
635         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
636         * to as &quot;Greek&quot;.
637         *
638         * @since 1.2
639         */
640        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
641        /**
642         * The &quot;Cyrillic&quot; Unicode Block.
643         *
644         * @since 1.2
645         */
646        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
647        /**
648         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
649         * referred to as &quot;Cyrillic Supplementary&quot;.
650         *
651         * @since 1.5
652         */
653        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
654        /**
655         * The &quot;Armenian&quot; Unicode Block.
656         *
657         * @since 1.2
658         */
659        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
660        /**
661         * The &quot;Hebrew&quot; Unicode Block.
662         *
663         * @since 1.2
664         */
665        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
666        /**
667         * The &quot;Arabic&quot; Unicode Block.
668         *
669         * @since 1.2
670         */
671        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
672        /**
673         * The &quot;Syriac&quot; Unicode Block.
674         *
675         * @since 1.4
676         */
677        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
678        /**
679         * The &quot;Thaana&quot; Unicode Block.
680         *
681         * @since 1.4
682         */
683        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
684        /**
685         * The &quot;Devanagari&quot; Unicode Block.
686         *
687         * @since 1.2
688         */
689        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
690        /**
691         * The &quot;Bengali&quot; Unicode Block.
692         *
693         * @since 1.2
694         */
695        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
696        /**
697         * The &quot;Gurmukhi&quot; Unicode Block.
698         *
699         * @since 1.2
700         */
701        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
702        /**
703         * The &quot;Gujarati&quot; Unicode Block.
704         *
705         * @since 1.2
706         */
707        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
708        /**
709         * The &quot;Oriya&quot; Unicode Block.
710         *
711         * @since 1.2
712         */
713        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
714        /**
715         * The &quot;Tamil&quot; Unicode Block.
716         *
717         * @since 1.2
718         */
719        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
720        /**
721         * The &quot;Telugu&quot; Unicode Block.
722         *
723         * @since 1.2
724         */
725        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
726        /**
727         * The &quot;Kannada&quot; Unicode Block.
728         *
729         * @since 1.2
730         */
731        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
732        /**
733         * The &quot;Malayalam&quot; Unicode Block.
734         *
735         * @since 1.2
736         */
737        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
738        /**
739         * The &quot;Sinhala&quot; Unicode Block.
740         *
741         * @since 1.4
742         */
743        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
744        /**
745         * The &quot;Thai&quot; Unicode Block.
746         *
747         * @since 1.2
748         */
749        public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
750        /**
751         * The &quot;Lao&quot; Unicode Block.
752         *
753         * @since 1.2
754         */
755        public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
756        /**
757         * The &quot;Tibetan&quot; Unicode Block.
758         *
759         * @since 1.2
760         */
761        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
762        /**
763         * The &quot;Myanmar&quot; Unicode Block.
764         *
765         * @since 1.4
766         */
767        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
768        /**
769         * The &quot;Georgian&quot; Unicode Block.
770         *
771         * @since 1.2
772         */
773        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
774        /**
775         * The &quot;Hangul Jamo&quot; Unicode Block.
776         *
777         * @since 1.2
778         */
779        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
780        /**
781         * The &quot;Ethiopic&quot; Unicode Block.
782         *
783         * @since 1.4
784         */
785        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
786        /**
787         * The &quot;Cherokee&quot; Unicode Block.
788         *
789         * @since 1.4
790         */
791        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
792        /**
793         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
794         *
795         * @since 1.4
796         */
797        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
798        /**
799         * The &quot;Ogham&quot; Unicode Block.
800         *
801         * @since 1.4
802         */
803        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
804        /**
805         * The &quot;Runic&quot; Unicode Block.
806         *
807         * @since 1.4
808         */
809        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
810        /**
811         * The &quot;Tagalog&quot; Unicode Block.
812         *
813         * @since 1.5
814         */
815        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
816        /**
817         * The &quot;Hanunoo&quot; Unicode Block.
818         *
819         * @since 1.5
820         */
821        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
822        /**
823         * The &quot;Buhid&quot; Unicode Block.
824         *
825         * @since 1.5
826         */
827        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
828        /**
829         * The &quot;Tagbanwa&quot; Unicode Block.
830         *
831         * @since 1.5
832         */
833        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
834        /**
835         * The &quot;Khmer&quot; Unicode Block.
836         *
837         * @since 1.4
838         */
839        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
840        /**
841         * The &quot;Mongolian&quot; Unicode Block.
842         *
843         * @since 1.4
844         */
845        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
846        /**
847         * The &quot;Limbu&quot; Unicode Block.
848         *
849         * @since 1.5
850         */
851        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
852        /**
853         * The &quot;Tai Le&quot; Unicode Block.
854         *
855         * @since 1.5
856         */
857        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
858        /**
859         * The &quot;Khmer Symbols&quot; Unicode Block.
860         *
861         * @since 1.5
862         */
863        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
864        /**
865         * The &quot;Phonetic Extensions&quot; Unicode Block.
866         *
867         * @since 1.5
868         */
869        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
870        /**
871         * The &quot;Latin Extended Additional&quot; Unicode Block.
872         *
873         * @since 1.2
874         */
875        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
876        /**
877         * The &quot;Greek Extended&quot; Unicode Block.
878         *
879         * @since 1.2
880         */
881        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
882        /**
883         * The &quot;General Punctuation&quot; Unicode Block.
884         *
885         * @since 1.2
886         */
887        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
888        /**
889         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
890         *
891         * @since 1.2
892         */
893        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
894        /**
895         * The &quot;Currency Symbols&quot; Unicode Block.
896         *
897         * @since 1.2
898         */
899        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
900        /**
901         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
902         * Block. Previously referred to as &quot;Combining Marks for
903         * Symbols&quot;.
904         *
905         * @since 1.2
906         */
907        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
908        /**
909         * The &quot;Letterlike Symbols&quot; Unicode Block.
910         *
911         * @since 1.2
912         */
913        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
914        /**
915         * The &quot;Number Forms&quot; Unicode Block.
916         *
917         * @since 1.2
918         */
919        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
920        /**
921         * The &quot;Arrows&quot; Unicode Block.
922         *
923         * @since 1.2
924         */
925        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
926        /**
927         * The &quot;Mathematical Operators&quot; Unicode Block.
928         *
929         * @since 1.2
930         */
931        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
932        /**
933         * The &quot;Miscellaneous Technical&quot; Unicode Block.
934         *
935         * @since 1.2
936         */
937        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
938        /**
939         * The &quot;Control Pictures&quot; Unicode Block.
940         *
941         * @since 1.2
942         */
943        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
944        /**
945         * The &quot;Optical Character Recognition&quot; Unicode Block.
946         *
947         * @since 1.2
948         */
949        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
950        /**
951         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
952         *
953         * @since 1.2
954         */
955        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
956        /**
957         * The &quot;Box Drawing&quot; Unicode Block.
958         *
959         * @since 1.2
960         */
961        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
962        /**
963         * The &quot;Block Elements&quot; Unicode Block.
964         *
965         * @since 1.2
966         */
967        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
968        /**
969         * The &quot;Geometric Shapes&quot; Unicode Block.
970         *
971         * @since 1.2
972         */
973        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
974        /**
975         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
976         *
977         * @since 1.2
978         */
979        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
980        /**
981         * The &quot;Dingbats&quot; Unicode Block.
982         *
983         * @since 1.2
984         */
985        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
986        /**
987         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
988         *
989         * @since 1.5
990         */
991        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
992        /**
993         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
994         *
995         * @since 1.5
996         */
997        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
998        /**
999         * The &quot;Braille Patterns&quot; Unicode Block.
1000         *
1001         * @since 1.4
1002         */
1003        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
1004        /**
1005         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
1006         *
1007         * @since 1.5
1008         */
1009        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1010        /**
1011         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
1012         *
1013         * @since 1.5
1014         */
1015        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1016        /**
1017         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1018         *
1019         * @since 1.5
1020         */
1021        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1022        /**
1023         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1024         *
1025         * @since 1.2
1026         */
1027        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1028        /**
1029         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1030         *
1031         * @since 1.4
1032         */
1033        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1034        /**
1035         * The &quot;Kangxi Radicals&quot; Unicode Block.
1036         *
1037         * @since 1.4
1038         */
1039        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
1040        /**
1041         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1042         *
1043         * @since 1.4
1044         */
1045        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1046        /**
1047         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1048         *
1049         * @since 1.2
1050         */
1051        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1052        /**
1053         * The &quot;Hiragana&quot; Unicode Block.
1054         *
1055         * @since 1.2
1056         */
1057        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
1058        /**
1059         * The &quot;Katakana&quot; Unicode Block.
1060         *
1061         * @since 1.2
1062         */
1063        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
1064        /**
1065         * The &quot;Bopomofo&quot; Unicode Block.
1066         *
1067         * @since 1.2
1068         */
1069        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
1070        /**
1071         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1072         *
1073         * @since 1.2
1074         */
1075        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1076        /**
1077         * The &quot;Kanbun&quot; Unicode Block.
1078         *
1079         * @since 1.2
1080         */
1081        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
1082        /**
1083         * The &quot;Bopomofo Extended&quot; Unicode Block.
1084         *
1085         * @since 1.4
1086         */
1087        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1088        /**
1089         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1090         *
1091         * @since 1.5
1092         */
1093        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1094        /**
1095         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1096         *
1097         * @since 1.2
1098         */
1099        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1100        /**
1101         * The &quot;CJK Compatibility&quot; Unicode Block.
1102         *
1103         * @since 1.2
1104         */
1105        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
1106        /**
1107         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1108         *
1109         * @since 1.4
1110         */
1111        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1112        /**
1113         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1114         *
1115         * @since 1.5
1116         */
1117        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1118        /**
1119         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1120         *
1121         * @since 1.2
1122         */
1123        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1124        /**
1125         * The &quot;Yi Syllables&quot; Unicode Block.
1126         *
1127         * @since 1.4
1128         */
1129        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
1130        /**
1131         * The &quot;Yi Radicals&quot; Unicode Block.
1132         *
1133         * @since 1.4
1134         */
1135        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
1136        /**
1137         * The &quot;Hangul Syllables&quot; Unicode Block.
1138         *
1139         * @since 1.2
1140         */
1141        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
1142        /**
1143         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1144         * code point values in the high surrogate range 0xD800 to 0xDB7F
1145         */
1146        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
1147        /**
1148         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1149         * represents code point values in the high surrogate range 0xDB80 to
1150         * 0xDBFF
1151         */
1152        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1153        /**
1154         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1155         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1156         */
1157        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
1158        /**
1159         * The &quot;Private Use Area&quot; Unicode Block.
1160         *
1161         * @since 1.2
1162         */
1163        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1164        /**
1165         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1166         *
1167         * @since 1.2
1168         */
1169        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1170        /**
1171         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1172         *
1173         * @since 1.2
1174         */
1175        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1176        /**
1177         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1178         *
1179         * @since 1.2
1180         */
1181        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1182        /**
1183         * The &quot;Variation Selectors&quot; Unicode Block.
1184         *
1185         * @since 1.5
1186         */
1187        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1188        /**
1189         * The &quot;Combining Half Marks&quot; Unicode Block.
1190         *
1191         * @since 1.2
1192         */
1193        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1194        /**
1195         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1196         *
1197         * @since 1.2
1198         */
1199        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1200        /**
1201         * The &quot;Small Form Variants&quot; Unicode Block.
1202         *
1203         * @since 1.2
1204         */
1205        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1206        /**
1207         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1208         *
1209         * @since 1.2
1210         */
1211        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1212        /**
1213         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1214         *
1215         * @since 1.2
1216         */
1217        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1218        /**
1219         * The &quot;Specials&quot; Unicode Block.
1220         *
1221         * @since 1.2
1222         */
1223        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
1224        /**
1225         * The &quot;Linear B Syllabary&quot; Unicode Block.
1226         *
1227         * @since 1.2
1228         */
1229        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1230        /**
1231         * The &quot;Linear B Ideograms&quot; Unicode Block.
1232         *
1233         * @since 1.5
1234         */
1235        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1236        /**
1237         * The &quot;Aegean Numbers&quot; Unicode Block.
1238         *
1239         * @since 1.5
1240         */
1241        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
1242        /**
1243         * The &quot;Old Italic&quot; Unicode Block.
1244         *
1245         * @since 1.5
1246         */
1247        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
1248        /**
1249         * The &quot;Gothic&quot; Unicode Block.
1250         *
1251         * @since 1.5
1252         */
1253        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
1254        /**
1255         * The &quot;Ugaritic&quot; Unicode Block.
1256         *
1257         * @since 1.5
1258         */
1259        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
1260        /**
1261         * The &quot;Deseret&quot; Unicode Block.
1262         *
1263         * @since 1.5
1264         */
1265        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
1266        /**
1267         * The &quot;Shavian&quot; Unicode Block.
1268         *
1269         * @since 1.5
1270         */
1271        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
1272        /**
1273         * The &quot;Osmanya&quot; Unicode Block.
1274         *
1275         * @since 1.5
1276         */
1277        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
1278        /**
1279         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1280         *
1281         * @since 1.5
1282         */
1283        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1284        /**
1285         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1286         *
1287         * @since 1.5
1288         */
1289        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1290        /**
1291         * The &quot;Musical Symbols&quot; Unicode Block.
1292         *
1293         * @since 1.5
1294         */
1295        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1296        /**
1297         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1298         *
1299         * @since 1.5
1300         */
1301        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1302        /**
1303         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1304         *
1305         * @since 1.5
1306         */
1307        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1308        /**
1309         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1310         *
1311         * @since 1.5
1312         */
1313        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1314        /**
1315         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1316         *
1317         * @since 1.5
1318         */
1319        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
1320        /**
1321         * The &quot;Tags&quot; Unicode Block.
1322         *
1323         * @since 1.5
1324         */
1325        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
1326        /**
1327         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1328         *
1329         * @since 1.5
1330         */
1331        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1332        /**
1333         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1334         *
1335         * @since 1.5
1336         */
1337        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1338        /**
1339         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1340         *
1341         * @since 1.5
1342         */
1343        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1344
1345        /*
1346         * All of the UnicodeBlocks with valid ranges in ascending order.
1347         */
1348        private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
1349            null,
1350            UnicodeBlock.BASIC_LATIN,
1351            UnicodeBlock.LATIN_1_SUPPLEMENT,
1352            UnicodeBlock.LATIN_EXTENDED_A,
1353            UnicodeBlock.LATIN_EXTENDED_B,
1354            UnicodeBlock.IPA_EXTENSIONS,
1355            UnicodeBlock.SPACING_MODIFIER_LETTERS,
1356            UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
1357            UnicodeBlock.GREEK,
1358            UnicodeBlock.CYRILLIC,
1359            UnicodeBlock.ARMENIAN,
1360            UnicodeBlock.HEBREW,
1361            UnicodeBlock.ARABIC,
1362            UnicodeBlock.SYRIAC,
1363            UnicodeBlock.THAANA,
1364            UnicodeBlock.DEVANAGARI,
1365            UnicodeBlock.BENGALI,
1366            UnicodeBlock.GURMUKHI,
1367            UnicodeBlock.GUJARATI,
1368            UnicodeBlock.ORIYA,
1369            UnicodeBlock.TAMIL,
1370            UnicodeBlock.TELUGU,
1371            UnicodeBlock.KANNADA,
1372            UnicodeBlock.MALAYALAM,
1373            UnicodeBlock.SINHALA,
1374            UnicodeBlock.THAI,
1375            UnicodeBlock.LAO,
1376            UnicodeBlock.TIBETAN,
1377            UnicodeBlock.MYANMAR,
1378            UnicodeBlock.GEORGIAN,
1379            UnicodeBlock.HANGUL_JAMO,
1380            UnicodeBlock.ETHIOPIC,
1381            UnicodeBlock.CHEROKEE,
1382            UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1383            UnicodeBlock.OGHAM,
1384            UnicodeBlock.RUNIC,
1385            UnicodeBlock.KHMER,
1386            UnicodeBlock.MONGOLIAN,
1387            UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
1388            UnicodeBlock.GREEK_EXTENDED,
1389            UnicodeBlock.GENERAL_PUNCTUATION,
1390            UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
1391            UnicodeBlock.CURRENCY_SYMBOLS,
1392            UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
1393            UnicodeBlock.LETTERLIKE_SYMBOLS,
1394            UnicodeBlock.NUMBER_FORMS,
1395            UnicodeBlock.ARROWS,
1396            UnicodeBlock.MATHEMATICAL_OPERATORS,
1397            UnicodeBlock.MISCELLANEOUS_TECHNICAL,
1398            UnicodeBlock.CONTROL_PICTURES,
1399            UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
1400            UnicodeBlock.ENCLOSED_ALPHANUMERICS,
1401            UnicodeBlock.BOX_DRAWING,
1402            UnicodeBlock.BLOCK_ELEMENTS,
1403            UnicodeBlock.GEOMETRIC_SHAPES,
1404            UnicodeBlock.MISCELLANEOUS_SYMBOLS,
1405            UnicodeBlock.DINGBATS,
1406            UnicodeBlock.BRAILLE_PATTERNS,
1407            UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
1408            UnicodeBlock.KANGXI_RADICALS,
1409            UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1410            UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
1411            UnicodeBlock.HIRAGANA,
1412            UnicodeBlock.KATAKANA,
1413            UnicodeBlock.BOPOMOFO,
1414            UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
1415            UnicodeBlock.KANBUN,
1416            UnicodeBlock.BOPOMOFO_EXTENDED,
1417            UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
1418            UnicodeBlock.CJK_COMPATIBILITY,
1419            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1420            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
1421            UnicodeBlock.YI_SYLLABLES,
1422            UnicodeBlock.YI_RADICALS,
1423            UnicodeBlock.HANGUL_SYLLABLES,
1424            UnicodeBlock.HIGH_SURROGATES,
1425            UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
1426            UnicodeBlock.LOW_SURROGATES,
1427            UnicodeBlock.PRIVATE_USE_AREA,
1428            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
1429            UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
1430            UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
1431            UnicodeBlock.COMBINING_HALF_MARKS,
1432            UnicodeBlock.CJK_COMPATIBILITY_FORMS,
1433            UnicodeBlock.SMALL_FORM_VARIANTS,
1434            UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
1435            UnicodeBlock.SPECIALS,
1436            UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
1437            UnicodeBlock.OLD_ITALIC,
1438            UnicodeBlock.GOTHIC,
1439            UnicodeBlock.DESERET,
1440            UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
1441            UnicodeBlock.MUSICAL_SYMBOLS,
1442            UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1443            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1444            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1445            UnicodeBlock.TAGS,
1446            UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
1447            UnicodeBlock.TAGALOG,
1448            UnicodeBlock.HANUNOO,
1449            UnicodeBlock.BUHID,
1450            UnicodeBlock.TAGBANWA,
1451            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1452            UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
1453            UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
1454            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1455            UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1456            UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
1457            UnicodeBlock.VARIATION_SELECTORS,
1458            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1459            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1460            UnicodeBlock.LIMBU,
1461            UnicodeBlock.TAI_LE,
1462            UnicodeBlock.KHMER_SYMBOLS,
1463            UnicodeBlock.PHONETIC_EXTENSIONS,
1464            UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1465            UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
1466            UnicodeBlock.LINEAR_B_SYLLABARY,
1467            UnicodeBlock.LINEAR_B_IDEOGRAMS,
1468            UnicodeBlock.AEGEAN_NUMBERS,
1469            UnicodeBlock.UGARITIC,
1470            UnicodeBlock.SHAVIAN,
1471            UnicodeBlock.OSMANYA,
1472            UnicodeBlock.CYPRIOT_SYLLABARY,
1473            UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
1474            UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
1475        };
1476
1477        /**
1478         * Retrieves the constant that corresponds to the specified block name.
1479         * The block names are defined by the Unicode 4.0.1 specification in the
1480         * {@code Blocks-4.0.1.txt} file.
1481         * <p>
1482         * Block names may be one of the following:
1483         * <ul>
1484         * <li>Canonical block name, as defined by the Unicode specification;
1485         * case-insensitive.</li>
1486         * <li>Canonical block name without any spaces, as defined by the
1487         * Unicode specification; case-insensitive.</li>
1488         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1489         * uppercasing the canonical name and replacing all spaces and hyphens
1490         * with underscores.</li>
1491         * </ul>
1492         *
1493         * @param blockName
1494         *            the name of the block to retrieve.
1495         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1496         * @throws NullPointerException
1497         *             if {@code blockName} is {@code null}.
1498         * @throws IllegalArgumentException
1499         *             if {@code blockName} is not a valid block name.
1500         * @since 1.5
1501         */
1502        public static UnicodeBlock forName(String blockName) {
1503            if (blockName == null) {
1504                throw new NullPointerException();
1505            }
1506            int block = forNameImpl(blockName);
1507            if (block == -1) {
1508                if (blockName.equals("SURROGATES_AREA")) {
1509                    return SURROGATES_AREA;
1510                } else if(blockName.equalsIgnoreCase("greek")) {
1511                    return GREEK;
1512                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1513                        blockName.equals("Combining Marks for Symbols") ||
1514                        blockName.equals("CombiningMarksforSymbols")) {
1515                    return COMBINING_MARKS_FOR_SYMBOLS;
1516                }
1517                throw new IllegalArgumentException("Bad block name: " + blockName);
1518            }
1519            return BLOCKS[block];
1520        }
1521
1522        /**
1523         * Gets the constant for the Unicode block that contains the specified
1524         * character.
1525         *
1526         * @param c
1527         *            the character for which to get the {@code UnicodeBlock}
1528         *            constant.
1529         * @return the {@code UnicodeBlock} constant for the block that contains
1530         *         {@code c}, or {@code null} if {@code c} does not belong to
1531         *         any defined block.
1532         */
1533        public static UnicodeBlock of(char c) {
1534            return of((int) c);
1535        }
1536
1537        /**
1538         * Gets the constant for the Unicode block that contains the specified
1539         * Unicode code point.
1540         *
1541         * @param codePoint
1542         *            the Unicode code point for which to get the
1543         *            {@code UnicodeBlock} constant.
1544         * @return the {@code UnicodeBlock} constant for the block that contains
1545         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1546         *         not belong to any defined block.
1547         * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
1548         * @since 1.5
1549         */
1550        public static UnicodeBlock of(int codePoint) {
1551            checkValidCodePoint(codePoint);
1552            int block = ofImpl(codePoint);
1553            if (block == -1 || block >= BLOCKS.length) {
1554                return null;
1555            }
1556            return BLOCKS[block];
1557        }
1558
1559        private UnicodeBlock(String blockName, int start, int end) {
1560            super(blockName);
1561        }
1562    }
1563
1564    private static native int forNameImpl(String blockName);
1565
1566    private static native int ofImpl(int codePoint);
1567
1568    /**
1569     * Constructs a new {@code Character} with the specified primitive char
1570     * value.
1571     *
1572     * @param value
1573     *            the primitive char value to store in the new instance.
1574     */
1575    public Character(char value) {
1576        this.value = value;
1577    }
1578
1579    /**
1580     * Gets the primitive value of this character.
1581     *
1582     * @return this object's primitive value.
1583     */
1584    public char charValue() {
1585        return value;
1586    }
1587
1588    private static void checkValidCodePoint(int codePoint) {
1589        if (!isValidCodePoint(codePoint)) {
1590            throw new IllegalArgumentException("Invalid code point: " + codePoint);
1591        }
1592    }
1593
1594    /**
1595     * Compares this object to the specified character object to determine their
1596     * relative order.
1597     *
1598     * @param c
1599     *            the character object to compare this object to.
1600     * @return {@code 0} if the value of this character and the value of
1601     *         {@code c} are equal; a positive value if the value of this
1602     *         character is greater than the value of {@code c}; a negative
1603     *         value if the value of this character is less than the value of
1604     *         {@code c}.
1605     * @see java.lang.Comparable
1606     * @since 1.2
1607     */
1608    public int compareTo(Character c) {
1609        return compare(value, c.value);
1610    }
1611
1612    /**
1613     * Compares two {@code char} values.
1614     * @return 0 if lhs = rhs, less than 0 if lhs &lt; rhs, and greater than 0 if lhs &gt; rhs.
1615     * @since 1.7
1616     * @hide 1.7
1617     */
1618    public static int compare(char lhs, char rhs) {
1619        return lhs - rhs;
1620    }
1621
1622    /**
1623     * Returns a {@code Character} instance for the {@code char} value passed.
1624     * <p>
1625     * If it is not necessary to get a new {@code Character} instance, it is
1626     * recommended to use this method instead of the constructor, since it
1627     * maintains a cache of instances which may result in better performance.
1628     *
1629     * @param c
1630     *            the char value for which to get a {@code Character} instance.
1631     * @return the {@code Character} instance for {@code c}.
1632     * @since 1.5
1633     */
1634    public static Character valueOf(char c) {
1635        return c < 128 ? SMALL_VALUES[c] : new Character(c);
1636    }
1637
1638    /**
1639     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1640     */
1641    private static final Character[] SMALL_VALUES = new Character[128];
1642
1643    static {
1644        for(int i = 0; i < 128; i++) {
1645            SMALL_VALUES[i] = new Character((char) i);
1646        }
1647    }
1648    /**
1649     * Indicates whether {@code codePoint} is a valid Unicode code point.
1650     *
1651     * @param codePoint
1652     *            the code point to test.
1653     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1654     *         {@code false} otherwise.
1655     * @since 1.5
1656     */
1657    public static boolean isValidCodePoint(int codePoint) {
1658        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1659    }
1660
1661    /**
1662     * Indicates whether {@code codePoint} is within the supplementary code
1663     * point range.
1664     *
1665     * @param codePoint
1666     *            the code point to test.
1667     * @return {@code true} if {@code codePoint} is within the supplementary
1668     *         code point range; {@code false} otherwise.
1669     * @since 1.5
1670     */
1671    public static boolean isSupplementaryCodePoint(int codePoint) {
1672        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1673    }
1674
1675    /**
1676     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1677     * that is used for representing supplementary characters in UTF-16
1678     * encoding.
1679     *
1680     * @param ch
1681     *            the character to test.
1682     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1683     *         {@code false} otherwise.
1684     * @see #isLowSurrogate(char)
1685     * @since 1.5
1686     */
1687    public static boolean isHighSurrogate(char ch) {
1688        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1689    }
1690
1691    /**
1692     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1693     * that is used for representing supplementary characters in UTF-16
1694     * encoding.
1695     *
1696     * @param ch
1697     *            the character to test.
1698     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1699     *         {@code false} otherwise.
1700     * @see #isHighSurrogate(char)
1701     * @since 1.5
1702     */
1703    public static boolean isLowSurrogate(char ch) {
1704        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1705    }
1706
1707    /**
1708     * Tests whether the given character is a high or low surrogate.
1709     * @since 1.7
1710     * @hide 1.7
1711     */
1712    public static boolean isSurrogate(char ch) {
1713        return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
1714    }
1715
1716    /**
1717     * Indicates whether the specified character pair is a valid surrogate pair.
1718     *
1719     * @param high
1720     *            the high surrogate unit to test.
1721     * @param low
1722     *            the low surrogate unit to test.
1723     * @return {@code true} if {@code high} is a high-surrogate code unit and
1724     *         {@code low} is a low-surrogate code unit; {@code false}
1725     *         otherwise.
1726     * @see #isHighSurrogate(char)
1727     * @see #isLowSurrogate(char)
1728     * @since 1.5
1729     */
1730    public static boolean isSurrogatePair(char high, char low) {
1731        return (isHighSurrogate(high) && isLowSurrogate(low));
1732    }
1733
1734    /**
1735     * Calculates the number of {@code char} values required to represent the
1736     * specified Unicode code point. This method checks if the {@code codePoint}
1737     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1738     * returned, otherwise {@code 1}. To test if the code point is valid, use
1739     * the {@link #isValidCodePoint(int)} method.
1740     *
1741     * @param codePoint
1742     *            the code point for which to calculate the number of required
1743     *            chars.
1744     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1745     * @see #isValidCodePoint(int)
1746     * @see #isSupplementaryCodePoint(int)
1747     * @since 1.5
1748     */
1749    public static int charCount(int codePoint) {
1750        return (codePoint >= 0x10000 ? 2 : 1);
1751    }
1752
1753    /**
1754     * Converts a surrogate pair into a Unicode code point. This method assumes
1755     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1756     * surrogates, then the result is indeterminate. The
1757     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1758     * method to validate the pair.
1759     *
1760     * @param high
1761     *            the high surrogate unit.
1762     * @param low
1763     *            the low surrogate unit.
1764     * @return the Unicode code point corresponding to the surrogate unit pair.
1765     * @see #isSurrogatePair(char, char)
1766     * @since 1.5
1767     */
1768    public static int toCodePoint(char high, char low) {
1769        // See RFC 2781, Section 2.2
1770        // http://www.ietf.org/rfc/rfc2781.txt
1771        int h = (high & 0x3FF) << 10;
1772        int l = low & 0x3FF;
1773        return (h | l) + 0x10000;
1774    }
1775
1776    /**
1777     * Returns the code point at {@code index} in the specified sequence of
1778     * character units. If the unit at {@code index} is a high-surrogate unit,
1779     * {@code index + 1} is less than the length of the sequence and the unit at
1780     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1781     * point represented by the pair is returned; otherwise the {@code char}
1782     * value at {@code index} is returned.
1783     *
1784     * @param seq
1785     *            the source sequence of {@code char} units.
1786     * @param index
1787     *            the position in {@code seq} from which to retrieve the code
1788     *            point.
1789     * @return the Unicode code point or {@code char} value at {@code index} in
1790     *         {@code seq}.
1791     * @throws NullPointerException
1792     *             if {@code seq} is {@code null}.
1793     * @throws IndexOutOfBoundsException
1794     *             if the {@code index} is negative or greater than or equal to
1795     *             the length of {@code seq}.
1796     * @since 1.5
1797     */
1798    public static int codePointAt(CharSequence seq, int index) {
1799        if (seq == null) {
1800            throw new NullPointerException();
1801        }
1802        int len = seq.length();
1803        if (index < 0 || index >= len) {
1804            throw new IndexOutOfBoundsException();
1805        }
1806
1807        char high = seq.charAt(index++);
1808        if (index >= len) {
1809            return high;
1810        }
1811        char low = seq.charAt(index);
1812        if (isSurrogatePair(high, low)) {
1813            return toCodePoint(high, low);
1814        }
1815        return high;
1816    }
1817
1818    /**
1819     * Returns the code point at {@code index} in the specified array of
1820     * character units. If the unit at {@code index} is a high-surrogate unit,
1821     * {@code index + 1} is less than the length of the array and the unit at
1822     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1823     * point represented by the pair is returned; otherwise the {@code char}
1824     * value at {@code index} is returned.
1825     *
1826     * @param seq
1827     *            the source array of {@code char} units.
1828     * @param index
1829     *            the position in {@code seq} from which to retrieve the code
1830     *            point.
1831     * @return the Unicode code point or {@code char} value at {@code index} in
1832     *         {@code seq}.
1833     * @throws NullPointerException
1834     *             if {@code seq} is {@code null}.
1835     * @throws IndexOutOfBoundsException
1836     *             if the {@code index} is negative or greater than or equal to
1837     *             the length of {@code seq}.
1838     * @since 1.5
1839     */
1840    public static int codePointAt(char[] seq, int index) {
1841        if (seq == null) {
1842            throw new NullPointerException();
1843        }
1844        int len = seq.length;
1845        if (index < 0 || index >= len) {
1846            throw new IndexOutOfBoundsException();
1847        }
1848
1849        char high = seq[index++];
1850        if (index >= len) {
1851            return high;
1852        }
1853        char low = seq[index];
1854        if (isSurrogatePair(high, low)) {
1855            return toCodePoint(high, low);
1856        }
1857        return high;
1858    }
1859
1860    /**
1861     * Returns the code point at {@code index} in the specified array of
1862     * character units, where {@code index} has to be less than {@code limit}.
1863     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1864     * is less than {@code limit} and the unit at {@code index + 1} is a
1865     * low-surrogate unit, then the supplementary code point represented by the
1866     * pair is returned; otherwise the {@code char} value at {@code index} is
1867     * returned.
1868     *
1869     * @param seq
1870     *            the source array of {@code char} units.
1871     * @param index
1872     *            the position in {@code seq} from which to get the code point.
1873     * @param limit
1874     *            the index after the last unit in {@code seq} that can be used.
1875     * @return the Unicode code point or {@code char} value at {@code index} in
1876     *         {@code seq}.
1877     * @throws NullPointerException
1878     *             if {@code seq} is {@code null}.
1879     * @throws IndexOutOfBoundsException
1880     *             if {@code index < 0}, {@code index >= limit},
1881     *             {@code limit < 0} or if {@code limit} is greater than the
1882     *             length of {@code seq}.
1883     * @since 1.5
1884     */
1885    public static int codePointAt(char[] seq, int index, int limit) {
1886        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1887            throw new IndexOutOfBoundsException();
1888        }
1889
1890        char high = seq[index++];
1891        if (index >= limit) {
1892            return high;
1893        }
1894        char low = seq[index];
1895        if (isSurrogatePair(high, low)) {
1896            return toCodePoint(high, low);
1897        }
1898        return high;
1899    }
1900
1901    /**
1902     * Returns the code point that precedes {@code index} in the specified
1903     * sequence of character units. If the unit at {@code index - 1} is a
1904     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1905     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1906     * point represented by the pair is returned; otherwise the {@code char}
1907     * value at {@code index - 1} is returned.
1908     *
1909     * @param seq
1910     *            the source sequence of {@code char} units.
1911     * @param index
1912     *            the position in {@code seq} following the code
1913     *            point that should be returned.
1914     * @return the Unicode code point or {@code char} value before {@code index}
1915     *         in {@code seq}.
1916     * @throws NullPointerException
1917     *             if {@code seq} is {@code null}.
1918     * @throws IndexOutOfBoundsException
1919     *             if the {@code index} is less than 1 or greater than the
1920     *             length of {@code seq}.
1921     * @since 1.5
1922     */
1923    public static int codePointBefore(CharSequence seq, int index) {
1924        if (seq == null) {
1925            throw new NullPointerException();
1926        }
1927        int len = seq.length();
1928        if (index < 1 || index > len) {
1929            throw new IndexOutOfBoundsException();
1930        }
1931
1932        char low = seq.charAt(--index);
1933        if (--index < 0) {
1934            return low;
1935        }
1936        char high = seq.charAt(index);
1937        if (isSurrogatePair(high, low)) {
1938            return toCodePoint(high, low);
1939        }
1940        return low;
1941    }
1942
1943    /**
1944     * Returns the code point that precedes {@code index} in the specified
1945     * array of character units. If the unit at {@code index - 1} is a
1946     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1947     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1948     * point represented by the pair is returned; otherwise the {@code char}
1949     * value at {@code index - 1} is returned.
1950     *
1951     * @param seq
1952     *            the source array of {@code char} units.
1953     * @param index
1954     *            the position in {@code seq} following the code
1955     *            point that should be returned.
1956     * @return the Unicode code point or {@code char} value before {@code index}
1957     *         in {@code seq}.
1958     * @throws NullPointerException
1959     *             if {@code seq} is {@code null}.
1960     * @throws IndexOutOfBoundsException
1961     *             if the {@code index} is less than 1 or greater than the
1962     *             length of {@code seq}.
1963     * @since 1.5
1964     */
1965    public static int codePointBefore(char[] seq, int index) {
1966        if (seq == null) {
1967            throw new NullPointerException();
1968        }
1969        int len = seq.length;
1970        if (index < 1 || index > len) {
1971            throw new IndexOutOfBoundsException();
1972        }
1973
1974        char low = seq[--index];
1975        if (--index < 0) {
1976            return low;
1977        }
1978        char high = seq[index];
1979        if (isSurrogatePair(high, low)) {
1980            return toCodePoint(high, low);
1981        }
1982        return low;
1983    }
1984
1985    /**
1986     * Returns the code point that precedes the {@code index} in the specified
1987     * array of character units and is not less than {@code start}. If the unit
1988     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1989     * less than {@code start} and the unit at {@code index - 2} is a
1990     * high-surrogate unit, then the supplementary code point represented by the
1991     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1992     * is returned.
1993     *
1994     * @param seq
1995     *            the source array of {@code char} units.
1996     * @param index
1997     *            the position in {@code seq} following the code point that
1998     *            should be returned.
1999     * @param start
2000     *            the index of the first element in {@code seq}.
2001     * @return the Unicode code point or {@code char} value before {@code index}
2002     *         in {@code seq}.
2003     * @throws NullPointerException
2004     *             if {@code seq} is {@code null}.
2005     * @throws IndexOutOfBoundsException
2006     *             if the {@code index <= start}, {@code start < 0},
2007     *             {@code index} is greater than the length of {@code seq}, or
2008     *             if {@code start} is equal or greater than the length of
2009     *             {@code seq}.
2010     * @since 1.5
2011     */
2012    public static int codePointBefore(char[] seq, int index, int start) {
2013        if (seq == null) {
2014            throw new NullPointerException();
2015        }
2016        int len = seq.length;
2017        if (index <= start || index > len || start < 0 || start >= len) {
2018            throw new IndexOutOfBoundsException();
2019        }
2020
2021        char low = seq[--index];
2022        if (--index < start) {
2023            return low;
2024        }
2025        char high = seq[index];
2026        if (isSurrogatePair(high, low)) {
2027            return toCodePoint(high, low);
2028        }
2029        return low;
2030    }
2031
2032    /**
2033     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2034     * and copies the value(s) into the char array {@code dst}, starting at
2035     * index {@code dstIndex}.
2036     *
2037     * @param codePoint
2038     *            the Unicode code point to encode.
2039     * @param dst
2040     *            the destination array to copy the encoded value into.
2041     * @param dstIndex
2042     *            the index in {@code dst} from where to start copying.
2043     * @return the number of {@code char} value units copied into {@code dst}.
2044     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2045     * @throws NullPointerException
2046     *             if {@code dst} is {@code null}.
2047     * @throws IndexOutOfBoundsException
2048     *             if {@code dstIndex} is negative, greater than or equal to
2049     *             {@code dst.length} or equals {@code dst.length - 1} when
2050     *             {@code codePoint} is a
2051     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
2052     * @since 1.5
2053     */
2054    public static int toChars(int codePoint, char[] dst, int dstIndex) {
2055        checkValidCodePoint(codePoint);
2056        if (dst == null) {
2057            throw new NullPointerException();
2058        }
2059        if (dstIndex < 0 || dstIndex >= dst.length) {
2060            throw new IndexOutOfBoundsException();
2061        }
2062
2063        if (isSupplementaryCodePoint(codePoint)) {
2064            if (dstIndex == dst.length - 1) {
2065                throw new IndexOutOfBoundsException();
2066            }
2067            // See RFC 2781, Section 2.1
2068            // http://www.ietf.org/rfc/rfc2781.txt
2069            int cpPrime = codePoint - 0x10000;
2070            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2071            int low = 0xDC00 | (cpPrime & 0x3FF);
2072            dst[dstIndex] = (char) high;
2073            dst[dstIndex + 1] = (char) low;
2074            return 2;
2075        }
2076
2077        dst[dstIndex] = (char) codePoint;
2078        return 1;
2079    }
2080
2081    /**
2082     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2083     * and returns it as a char array.
2084     *
2085     * @param codePoint
2086     *            the Unicode code point to encode.
2087     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
2088     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
2089     *         then the returned array contains two characters, otherwise it
2090     *         contains just one character.
2091     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2092     * @since 1.5
2093     */
2094    public static char[] toChars(int codePoint) {
2095        checkValidCodePoint(codePoint);
2096        if (isSupplementaryCodePoint(codePoint)) {
2097            int cpPrime = codePoint - 0x10000;
2098            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2099            int low = 0xDC00 | (cpPrime & 0x3FF);
2100            return new char[] { (char) high, (char) low };
2101        }
2102        return new char[] { (char) codePoint };
2103    }
2104
2105    /**
2106     * Counts the number of Unicode code points in the subsequence of the
2107     * specified character sequence, as delineated by {@code beginIndex} and
2108     * {@code endIndex}. Any surrogate values with missing pair values will be
2109     * counted as one code point.
2110     *
2111     * @param seq
2112     *            the {@code CharSequence} to look through.
2113     * @param beginIndex
2114     *            the inclusive index to begin counting at.
2115     * @param endIndex
2116     *            the exclusive index to stop counting at.
2117     * @return the number of Unicode code points.
2118     * @throws NullPointerException
2119     *             if {@code seq} is {@code null}.
2120     * @throws IndexOutOfBoundsException
2121     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2122     *             if {@code endIndex} is greater than the length of {@code seq}.
2123     * @since 1.5
2124     */
2125    public static int codePointCount(CharSequence seq, int beginIndex,
2126            int endIndex) {
2127        if (seq == null) {
2128            throw new NullPointerException();
2129        }
2130        int len = seq.length();
2131        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2132            throw new IndexOutOfBoundsException();
2133        }
2134
2135        int result = 0;
2136        for (int i = beginIndex; i < endIndex; i++) {
2137            char c = seq.charAt(i);
2138            if (isHighSurrogate(c)) {
2139                if (++i < endIndex) {
2140                    c = seq.charAt(i);
2141                    if (!isLowSurrogate(c)) {
2142                        result++;
2143                    }
2144                }
2145            }
2146            result++;
2147        }
2148        return result;
2149    }
2150
2151    /**
2152     * Counts the number of Unicode code points in the subsequence of the
2153     * specified char array, as delineated by {@code offset} and {@code count}.
2154     * Any surrogate values with missing pair values will be counted as one code
2155     * point.
2156     *
2157     * @param seq
2158     *            the char array to look through
2159     * @param offset
2160     *            the inclusive index to begin counting at.
2161     * @param count
2162     *            the number of {@code char} values to look through in
2163     *            {@code seq}.
2164     * @return the number of Unicode code points.
2165     * @throws NullPointerException
2166     *             if {@code seq} is {@code null}.
2167     * @throws IndexOutOfBoundsException
2168     *             if {@code offset < 0}, {@code count < 0} or if
2169     *             {@code offset + count} is greater than the length of
2170     *             {@code seq}.
2171     * @since 1.5
2172     */
2173    public static int codePointCount(char[] seq, int offset, int count) {
2174        Arrays.checkOffsetAndCount(seq.length, offset, count);
2175        int endIndex = offset + count;
2176        int result = 0;
2177        for (int i = offset; i < endIndex; i++) {
2178            char c = seq[i];
2179            if (isHighSurrogate(c)) {
2180                if (++i < endIndex) {
2181                    c = seq[i];
2182                    if (!isLowSurrogate(c)) {
2183                        result++;
2184                    }
2185                }
2186            }
2187            result++;
2188        }
2189        return result;
2190    }
2191
2192    /**
2193     * Determines the index in the specified character sequence that is offset
2194     * {@code codePointOffset} code points from {@code index}.
2195     *
2196     * @param seq
2197     *            the character sequence to find the index in.
2198     * @param index
2199     *            the start index in {@code seq}.
2200     * @param codePointOffset
2201     *            the number of code points to look backwards or forwards; may
2202     *            be a negative or positive value.
2203     * @return the index in {@code seq} that is {@code codePointOffset} code
2204     *         points away from {@code index}.
2205     * @throws NullPointerException
2206     *             if {@code seq} is {@code null}.
2207     * @throws IndexOutOfBoundsException
2208     *             if {@code index < 0}, {@code index} is greater than the
2209     *             length of {@code seq}, or if there are not enough values in
2210     *             {@code seq} to skip {@code codePointOffset} code points
2211     *             forwards or backwards (if {@code codePointOffset} is
2212     *             negative) from {@code index}.
2213     * @since 1.5
2214     */
2215    public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
2216        if (seq == null) {
2217            throw new NullPointerException();
2218        }
2219        int len = seq.length();
2220        if (index < 0 || index > len) {
2221            throw new IndexOutOfBoundsException();
2222        }
2223
2224        if (codePointOffset == 0) {
2225            return index;
2226        }
2227
2228        if (codePointOffset > 0) {
2229            int codePoints = codePointOffset;
2230            int i = index;
2231            while (codePoints > 0) {
2232                codePoints--;
2233                if (i >= len) {
2234                    throw new IndexOutOfBoundsException();
2235                }
2236                if (isHighSurrogate(seq.charAt(i))) {
2237                    int next = i + 1;
2238                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2239                        i++;
2240                    }
2241                }
2242                i++;
2243            }
2244            return i;
2245        }
2246
2247        int codePoints = -codePointOffset;
2248        int i = index;
2249        while (codePoints > 0) {
2250            codePoints--;
2251            i--;
2252            if (i < 0) {
2253                throw new IndexOutOfBoundsException();
2254            }
2255            if (isLowSurrogate(seq.charAt(i))) {
2256                int prev = i - 1;
2257                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2258                    i--;
2259                }
2260            }
2261        }
2262        return i;
2263    }
2264
2265    /**
2266     * Determines the index in a subsequence of the specified character array
2267     * that is offset {@code codePointOffset} code points from {@code index}.
2268     * The subsequence is delineated by {@code start} and {@code count}.
2269     *
2270     * @param seq
2271     *            the character array to find the index in.
2272     * @param start
2273     *            the inclusive index that marks the beginning of the
2274     *            subsequence.
2275     * @param count
2276     *            the number of {@code char} values to include within the
2277     *            subsequence.
2278     * @param index
2279     *            the start index in the subsequence of the char array.
2280     * @param codePointOffset
2281     *            the number of code points to look backwards or forwards; may
2282     *            be a negative or positive value.
2283     * @return the index in {@code seq} that is {@code codePointOffset} code
2284     *         points away from {@code index}.
2285     * @throws NullPointerException
2286     *             if {@code seq} is {@code null}.
2287     * @throws IndexOutOfBoundsException
2288     *             if {@code start < 0}, {@code count < 0},
2289     *             {@code index < start}, {@code index > start + count},
2290     *             {@code start + count} is greater than the length of
2291     *             {@code seq}, or if there are not enough values in
2292     *             {@code seq} to skip {@code codePointOffset} code points
2293     *             forward or backward (if {@code codePointOffset} is
2294     *             negative) from {@code index}.
2295     * @since 1.5
2296     */
2297    public static int offsetByCodePoints(char[] seq, int start, int count,
2298            int index, int codePointOffset) {
2299        Arrays.checkOffsetAndCount(seq.length, start, count);
2300        int end = start + count;
2301        if (index < start || index > end) {
2302            throw new IndexOutOfBoundsException();
2303        }
2304
2305        if (codePointOffset == 0) {
2306            return index;
2307        }
2308
2309        if (codePointOffset > 0) {
2310            int codePoints = codePointOffset;
2311            int i = index;
2312            while (codePoints > 0) {
2313                codePoints--;
2314                if (i >= end) {
2315                    throw new IndexOutOfBoundsException();
2316                }
2317                if (isHighSurrogate(seq[i])) {
2318                    int next = i + 1;
2319                    if (next < end && isLowSurrogate(seq[next])) {
2320                        i++;
2321                    }
2322                }
2323                i++;
2324            }
2325            return i;
2326        }
2327
2328        int codePoints = -codePointOffset;
2329        int i = index;
2330        while (codePoints > 0) {
2331            codePoints--;
2332            i--;
2333            if (i < start) {
2334                throw new IndexOutOfBoundsException();
2335            }
2336            if (isLowSurrogate(seq[i])) {
2337                int prev = i - 1;
2338                if (prev >= start && isHighSurrogate(seq[prev])) {
2339                    i--;
2340                }
2341            }
2342        }
2343        return i;
2344    }
2345
2346    /**
2347     * Convenience method to determine the value of the specified character
2348     * {@code c} in the supplied radix. The value of {@code radix} must be
2349     * between MIN_RADIX and MAX_RADIX.
2350     *
2351     * @param c
2352     *            the character to determine the value of.
2353     * @param radix
2354     *            the radix.
2355     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2356     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2357     */
2358    public static int digit(char c, int radix) {
2359        return digit((int) c, radix);
2360    }
2361
2362    /**
2363     * Convenience method to determine the value of the character
2364     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2365     * be between MIN_RADIX and MAX_RADIX.
2366     *
2367     * @param codePoint
2368     *            the character, including supplementary characters.
2369     * @param radix
2370     *            the radix.
2371     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2372     *         {@link #MAX_RADIX} then the value of the character in the radix;
2373     *         -1 otherwise.
2374     */
2375    public static int digit(int codePoint, int radix) {
2376        if (radix < MIN_RADIX || radix > MAX_RADIX) {
2377            return -1;
2378        }
2379        if (codePoint < 128) {
2380            // Optimized for ASCII
2381            int result = -1;
2382            if ('0' <= codePoint && codePoint <= '9') {
2383                result = codePoint - '0';
2384            } else if ('a' <= codePoint && codePoint <= 'z') {
2385                result = 10 + (codePoint - 'a');
2386            } else if ('A' <= codePoint && codePoint <= 'Z') {
2387                result = 10 + (codePoint - 'A');
2388            }
2389            return result < radix ? result : -1;
2390        }
2391        return digitImpl(codePoint, radix);
2392    }
2393
2394    private static native int digitImpl(int codePoint, int radix);
2395
2396    /**
2397     * Compares this object with the specified object and indicates if they are
2398     * equal. In order to be equal, {@code object} must be an instance of
2399     * {@code Character} and have the same char value as this object.
2400     *
2401     * @param object
2402     *            the object to compare this double with.
2403     * @return {@code true} if the specified object is equal to this
2404     *         {@code Character}; {@code false} otherwise.
2405     */
2406    @Override
2407    public boolean equals(Object object) {
2408        return (object instanceof Character) && (value == ((Character) object).value);
2409    }
2410
2411    /**
2412     * Returns the character which represents the specified digit in the
2413     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2414     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2415     * smaller than {@code radix}. If any of these conditions does not hold, 0
2416     * is returned.
2417     *
2418     * @param digit
2419     *            the integer value.
2420     * @param radix
2421     *            the radix.
2422     * @return the character which represents the {@code digit} in the
2423     *         {@code radix}.
2424     */
2425    public static char forDigit(int digit, int radix) {
2426        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2427            if (digit >= 0 && digit < radix) {
2428                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2429            }
2430        }
2431        return 0;
2432    }
2433
2434    /**
2435     * Returns the name of the given code point, or null if the code point is unassigned.
2436     *
2437     * <p>As a fallback mechanism this method returns strings consisting of the Unicode
2438     * block name (with underscores replaced by spaces), a single space, and the uppercase
2439     * hex value of the code point, using as few digits as necessary.
2440     *
2441     * <p>Examples:
2442     * <ul>
2443     * <li>{@code Character.getName(0)} returns "NULL".
2444     * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E".
2445     * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX".
2446     * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000".
2447     * </ul>
2448     *
2449     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2450     * @since 1.7
2451     * @hide 1.7
2452     */
2453    public static String getName(int codePoint) {
2454        checkValidCodePoint(codePoint);
2455        if (getType(codePoint) == Character.UNASSIGNED) {
2456            return null;
2457        }
2458        String result = getNameImpl(codePoint);
2459        if (result == null) {
2460            String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ');
2461            result = blockName + " " + IntegralToString.intToHexString(codePoint, true);
2462        }
2463        return result;
2464    }
2465
2466    private static native String getNameImpl(int codePoint);
2467
2468    /**
2469     * Returns the numeric value of the specified Unicode character.
2470     * See {@link #getNumericValue(int)}.
2471     *
2472     * @param c the character
2473     * @return a non-negative numeric integer value if a numeric value for
2474     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2475     *         -2 if the numeric value can not be represented as an integer.
2476     */
2477    public static int getNumericValue(char c) {
2478        return getNumericValue((int) c);
2479    }
2480
2481    /**
2482     * Gets the numeric value of the specified Unicode code point. For example,
2483     * the code point '\u216B' stands for the Roman number XII, which has the
2484     * numeric value 12.
2485     *
2486     * <p>There are two points of divergence between this method and the Unicode
2487     * specification. This method treats the letters a-z (in both upper and lower
2488     * cases, and their full-width variants) as numbers from 10 to 35. The
2489     * Unicode specification also supports the idea of code points with non-integer
2490     * numeric values; this method does not (except to the extent of returning -2
2491     * for such code points).
2492     *
2493     * @param codePoint the code point
2494     * @return a non-negative numeric integer value if a numeric value for
2495     *         {@code codePoint} exists, -1 if there is no numeric value for
2496     *         {@code codePoint}, -2 if the numeric value can not be
2497     *         represented with an integer.
2498     */
2499    public static int getNumericValue(int codePoint) {
2500        // This is both an optimization and papers over differences between Java and ICU.
2501        if (codePoint < 128) {
2502            if (codePoint >= '0' && codePoint <= '9') {
2503                return codePoint - '0';
2504            }
2505            if (codePoint >= 'a' && codePoint <= 'z') {
2506                return codePoint - ('a' - 10);
2507            }
2508            if (codePoint >= 'A' && codePoint <= 'Z') {
2509                return codePoint - ('A' - 10);
2510            }
2511            return -1;
2512        }
2513        // Full-width uppercase A-Z.
2514        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
2515            return codePoint - 0xff17;
2516        }
2517        // Full-width lowercase a-z.
2518        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
2519            return codePoint - 0xff37;
2520        }
2521        return getNumericValueImpl(codePoint);
2522    }
2523
2524    private static native int getNumericValueImpl(int codePoint);
2525
2526    /**
2527     * Gets the general Unicode category of the specified character.
2528     *
2529     * @param c
2530     *            the character to get the category of.
2531     * @return the Unicode category of {@code c}.
2532     */
2533    public static int getType(char c) {
2534        return getType((int) c);
2535    }
2536
2537    /**
2538     * Gets the general Unicode category of the specified code point.
2539     *
2540     * @param codePoint
2541     *            the Unicode code point to get the category of.
2542     * @return the Unicode category of {@code codePoint}.
2543     */
2544    public static int getType(int codePoint) {
2545        int type = getTypeImpl(codePoint);
2546        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
2547        if (type <= Character.FORMAT) {
2548            return type;
2549        }
2550        return (type + 1);
2551    }
2552
2553    private static native int getTypeImpl(int codePoint);
2554
2555    /**
2556     * Gets the Unicode directionality of the specified character.
2557     *
2558     * @param c
2559     *            the character to get the directionality of.
2560     * @return the Unicode directionality of {@code c}.
2561     */
2562    public static byte getDirectionality(char c) {
2563        return getDirectionality((int)c);
2564    }
2565
2566    /**
2567     * Gets the Unicode directionality of the specified character.
2568     *
2569     * @param codePoint
2570     *            the Unicode code point to get the directionality of.
2571     * @return the Unicode directionality of {@code codePoint}.
2572     */
2573    public static byte getDirectionality(int codePoint) {
2574        if (getType(codePoint) == Character.UNASSIGNED) {
2575            return Character.DIRECTIONALITY_UNDEFINED;
2576        }
2577
2578        byte directionality = getDirectionalityImpl(codePoint);
2579        if (directionality == -1) {
2580            return -1;
2581        }
2582        return DIRECTIONALITY[directionality];
2583    }
2584
2585    private static native byte getDirectionalityImpl(int codePoint);
2586
2587    /**
2588     * Indicates whether the specified character is mirrored.
2589     *
2590     * @param c
2591     *            the character to check.
2592     * @return {@code true} if {@code c} is mirrored; {@code false}
2593     *         otherwise.
2594     */
2595    public static boolean isMirrored(char c) {
2596        return isMirrored((int) c);
2597    }
2598
2599    /**
2600     * Indicates whether the specified code point is mirrored.
2601     *
2602     * @param codePoint
2603     *            the code point to check.
2604     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2605     *         otherwise.
2606     */
2607    public static boolean isMirrored(int codePoint) {
2608        return isMirroredImpl(codePoint);
2609    }
2610
2611    private static native boolean isMirroredImpl(int codePoint);
2612
2613    @Override
2614    public int hashCode() {
2615        return value;
2616    }
2617
2618    /**
2619     * Returns the high surrogate for the given code point. The result is meaningless if
2620     * the given code point is not a supplementary character.
2621     * @since 1.7
2622     * @hide 1.7
2623     */
2624    public static char highSurrogate(int codePoint) {
2625        return (char) ((codePoint >> 10) + 0xd7c0);
2626    }
2627
2628    /**
2629     * Returns the low surrogate for the given code point. The result is meaningless if
2630     * the given code point is not a supplementary character.
2631     * @since 1.7
2632     * @hide 1.7
2633     */
2634    public static char lowSurrogate(int codePoint) {
2635        return (char) ((codePoint & 0x3ff) | 0xdc00);
2636    }
2637
2638    /**
2639     * Tests whether the given code point is in the Basic Multilingual Plane (BMP).
2640     * Such code points can be represented by a single {@code char}.
2641     * @since 1.7
2642     * @hide 1.7
2643     */
2644    public static boolean isBmpCodePoint(int codePoint) {
2645        return codePoint >= 0 && codePoint <= 0xffff;
2646    }
2647
2648    /**
2649     * Indicates whether the specified character is defined in the Unicode
2650     * specification.
2651     *
2652     * @param c
2653     *            the character to check.
2654     * @return {@code true} if the general Unicode category of the character is
2655     *         not {@code UNASSIGNED}; {@code false} otherwise.
2656     */
2657    public static boolean isDefined(char c) {
2658        return isDefinedImpl(c);
2659    }
2660
2661    /**
2662     * Indicates whether the specified code point is defined in the Unicode
2663     * specification.
2664     *
2665     * @param codePoint
2666     *            the code point to check.
2667     * @return {@code true} if the general Unicode category of the code point is
2668     *         not {@code UNASSIGNED}; {@code false} otherwise.
2669     */
2670    public static boolean isDefined(int codePoint) {
2671        return isDefinedImpl(codePoint);
2672    }
2673
2674    private static native boolean isDefinedImpl(int codePoint);
2675
2676    /**
2677     * Indicates whether the specified character is a digit.
2678     *
2679     * @param c
2680     *            the character to check.
2681     * @return {@code true} if {@code c} is a digit; {@code false}
2682     *         otherwise.
2683     */
2684    public static boolean isDigit(char c) {
2685        return isDigit((int) c);
2686    }
2687
2688    /**
2689     * Indicates whether the specified code point is a digit.
2690     *
2691     * @param codePoint
2692     *            the code point to check.
2693     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2694     *         otherwise.
2695     */
2696    public static boolean isDigit(int codePoint) {
2697        // Optimized case for ASCII
2698        if ('0' <= codePoint && codePoint <= '9') {
2699            return true;
2700        }
2701        if (codePoint < 1632) {
2702            return false;
2703        }
2704        return isDigitImpl(codePoint);
2705    }
2706
2707    private static native boolean isDigitImpl(int codePoint);
2708
2709    /**
2710     * Indicates whether the specified character is ignorable in a Java or
2711     * Unicode identifier.
2712     *
2713     * @param c
2714     *            the character to check.
2715     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2716     */
2717    public static boolean isIdentifierIgnorable(char c) {
2718        return isIdentifierIgnorable((int) c);
2719    }
2720
2721    /**
2722     * Indicates whether the specified code point is ignorable in a Java or
2723     * Unicode identifier.
2724     *
2725     * @param codePoint
2726     *            the code point to check.
2727     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2728     *         otherwise.
2729     */
2730    public static boolean isIdentifierIgnorable(int codePoint) {
2731        // This is both an optimization and papers over differences between Java and ICU.
2732        if (codePoint < 0x600) {
2733            return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
2734                    (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
2735        }
2736        return isIdentifierIgnorableImpl(codePoint);
2737    }
2738
2739    private static native boolean isIdentifierIgnorableImpl(int codePoint);
2740
2741    /**
2742     * Indicates whether the specified character is an ISO control character.
2743     *
2744     * @param c
2745     *            the character to check.
2746     * @return {@code true} if {@code c} is an ISO control character;
2747     *         {@code false} otherwise.
2748     */
2749    public static boolean isISOControl(char c) {
2750        return isISOControl((int) c);
2751    }
2752
2753    /**
2754     * Indicates whether the specified code point is an ISO control character.
2755     *
2756     * @param c
2757     *            the code point to check.
2758     * @return {@code true} if {@code c} is an ISO control character;
2759     *         {@code false} otherwise.
2760     */
2761    public static boolean isISOControl(int c) {
2762        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2763    }
2764
2765    /**
2766     * Indicates whether the specified character is a valid part of a Java
2767     * identifier other than the first character.
2768     *
2769     * @param c
2770     *            the character to check.
2771     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2772     *         {@code false} otherwise.
2773     */
2774    public static boolean isJavaIdentifierPart(char c) {
2775        return isJavaIdentifierPart((int) c);
2776    }
2777
2778    /**
2779     * Indicates whether the specified code point is a valid part of a Java
2780     * identifier other than the first character.
2781     *
2782     * @param codePoint
2783     *            the code point to check.
2784     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2785     *         {@code false} otherwise.
2786     */
2787    public static boolean isJavaIdentifierPart(int codePoint) {
2788        // Use precomputed bitmasks to optimize the ASCII range.
2789        if (codePoint < 64) {
2790            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
2791        } else if (codePoint < 128) {
2792            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2793        }
2794        int type = getType(codePoint);
2795        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2796                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2797                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2798                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2799                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
2800                || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
2801    }
2802
2803    /**
2804     * Indicates whether the specified character is a valid first character for
2805     * a Java identifier.
2806     *
2807     * @param c
2808     *            the character to check.
2809     * @return {@code true} if {@code c} is a valid first character of a Java
2810     *         identifier; {@code false} otherwise.
2811     */
2812    public static boolean isJavaIdentifierStart(char c) {
2813        return isJavaIdentifierStart((int) c);
2814    }
2815
2816    /**
2817     * Indicates whether the specified code point is a valid first character for
2818     * a Java identifier.
2819     *
2820     * @param codePoint
2821     *            the code point to check.
2822     * @return {@code true} if {@code codePoint} is a valid start of a Java
2823     *         identifier; {@code false} otherwise.
2824     */
2825    public static boolean isJavaIdentifierStart(int codePoint) {
2826        // Use precomputed bitmasks to optimize the ASCII range.
2827        if (codePoint < 64) {
2828            return (codePoint == '$'); // There's only one character in this range.
2829        } else if (codePoint < 128) {
2830            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2831        }
2832        int type = getType(codePoint);
2833        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
2834                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2835    }
2836
2837    /**
2838     * Indicates whether the specified character is a Java letter.
2839     *
2840     * @param c
2841     *            the character to check.
2842     * @return {@code true} if {@code c} is a Java letter; {@code false}
2843     *         otherwise.
2844     * @deprecated Use {@link #isJavaIdentifierStart(char)}
2845     */
2846    @Deprecated
2847    public static boolean isJavaLetter(char c) {
2848        return isJavaIdentifierStart(c);
2849    }
2850
2851    /**
2852     * Indicates whether the specified character is a Java letter or digit
2853     * character.
2854     *
2855     * @param c
2856     *            the character to check.
2857     * @return {@code true} if {@code c} is a Java letter or digit;
2858     *         {@code false} otherwise.
2859     * @deprecated Use {@link #isJavaIdentifierPart(char)}
2860     */
2861    @Deprecated
2862    public static boolean isJavaLetterOrDigit(char c) {
2863        return isJavaIdentifierPart(c);
2864    }
2865
2866    /**
2867     * Indicates whether the specified character is a letter.
2868     *
2869     * @param c
2870     *            the character to check.
2871     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2872     */
2873    public static boolean isLetter(char c) {
2874        return isLetter((int) c);
2875    }
2876
2877    /**
2878     * Indicates whether the specified code point is a letter.
2879     *
2880     * @param codePoint
2881     *            the code point to check.
2882     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2883     *         otherwise.
2884     */
2885    public static boolean isLetter(int codePoint) {
2886        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2887            return true;
2888        }
2889        if (codePoint < 128) {
2890            return false;
2891        }
2892        return isLetterImpl(codePoint);
2893    }
2894
2895    private static native boolean isLetterImpl(int codePoint);
2896
2897    /**
2898     * Indicates whether the specified character is a letter or a digit.
2899     *
2900     * @param c
2901     *            the character to check.
2902     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2903     *         otherwise.
2904     */
2905    public static boolean isLetterOrDigit(char c) {
2906        return isLetterOrDigit((int) c);
2907    }
2908
2909    /**
2910     * Indicates whether the specified code point is a letter or a digit.
2911     *
2912     * @param codePoint
2913     *            the code point to check.
2914     * @return {@code true} if {@code codePoint} is a letter or a digit;
2915     *         {@code false} otherwise.
2916     */
2917    public static boolean isLetterOrDigit(int codePoint) {
2918        // Optimized case for ASCII
2919        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2920            return true;
2921        }
2922        if ('0' <= codePoint && codePoint <= '9') {
2923            return true;
2924        }
2925        if (codePoint < 128) {
2926            return false;
2927        }
2928        return isLetterOrDigitImpl(codePoint);
2929    }
2930
2931    private static native boolean isLetterOrDigitImpl(int codePoint);
2932
2933    /**
2934     * Indicates whether the specified character is a lower case letter.
2935     *
2936     * @param c
2937     *            the character to check.
2938     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2939     *         otherwise.
2940     */
2941    public static boolean isLowerCase(char c) {
2942        return isLowerCase((int) c);
2943    }
2944
2945    /**
2946     * Indicates whether the specified code point is a lower case letter.
2947     *
2948     * @param codePoint
2949     *            the code point to check.
2950     * @return {@code true} if {@code codePoint} is a lower case letter;
2951     *         {@code false} otherwise.
2952     */
2953    public static boolean isLowerCase(int codePoint) {
2954        // Optimized case for ASCII
2955        if ('a' <= codePoint && codePoint <= 'z') {
2956            return true;
2957        }
2958        if (codePoint < 128) {
2959            return false;
2960        }
2961        return isLowerCaseImpl(codePoint);
2962    }
2963
2964    private static native boolean isLowerCaseImpl(int codePoint);
2965
2966    /**
2967     * Indicates whether the specified character is a Java space.
2968     *
2969     * @param c
2970     *            the character to check.
2971     * @return {@code true} if {@code c} is a Java space; {@code false}
2972     *         otherwise.
2973     * @deprecated Use {@link #isWhitespace(char)}
2974     */
2975    @Deprecated
2976    public static boolean isSpace(char c) {
2977        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2978    }
2979
2980    /**
2981     * Indicates whether the specified character is a Unicode space character.
2982     * That is, if it is a member of one of the Unicode categories Space
2983     * Separator, Line Separator, or Paragraph Separator.
2984     *
2985     * @param c
2986     *            the character to check.
2987     * @return {@code true} if {@code c} is a Unicode space character,
2988     *         {@code false} otherwise.
2989     */
2990    public static boolean isSpaceChar(char c) {
2991        return isSpaceChar((int) c);
2992    }
2993
2994    /**
2995     * Indicates whether the specified code point is a Unicode space character.
2996     * That is, if it is a member of one of the Unicode categories Space
2997     * Separator, Line Separator, or Paragraph Separator.
2998     *
2999     * @param codePoint
3000     *            the code point to check.
3001     * @return {@code true} if {@code codePoint} is a Unicode space character,
3002     *         {@code false} otherwise.
3003     */
3004    public static boolean isSpaceChar(int codePoint) {
3005        if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
3006            return true;
3007        }
3008        if (codePoint < 0x2000) {
3009            return false;
3010        }
3011        if (codePoint <= 0xffff) {
3012            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3013                    codePoint == 0x202f || codePoint == 0x3000;
3014        }
3015        return isSpaceCharImpl(codePoint);
3016    }
3017
3018    private static native boolean isSpaceCharImpl(int codePoint);
3019
3020    /**
3021     * Indicates whether the specified character is a titlecase character.
3022     *
3023     * @param c
3024     *            the character to check.
3025     * @return {@code true} if {@code c} is a titlecase character, {@code false}
3026     *         otherwise.
3027     */
3028    public static boolean isTitleCase(char c) {
3029        return isTitleCaseImpl(c);
3030    }
3031
3032    /**
3033     * Indicates whether the specified code point is a titlecase character.
3034     *
3035     * @param codePoint
3036     *            the code point to check.
3037     * @return {@code true} if {@code codePoint} is a titlecase character,
3038     *         {@code false} otherwise.
3039     */
3040    public static boolean isTitleCase(int codePoint) {
3041        return isTitleCaseImpl(codePoint);
3042    }
3043
3044    private static native boolean isTitleCaseImpl(int codePoint);
3045
3046    /**
3047     * Indicates whether the specified character is valid as part of a Unicode
3048     * identifier other than the first character.
3049     *
3050     * @param c
3051     *            the character to check.
3052     * @return {@code true} if {@code c} is valid as part of a Unicode
3053     *         identifier; {@code false} otherwise.
3054     */
3055    public static boolean isUnicodeIdentifierPart(char c) {
3056        return isUnicodeIdentifierPartImpl(c);
3057    }
3058
3059    /**
3060     * Indicates whether the specified code point is valid as part of a Unicode
3061     * identifier other than the first character.
3062     *
3063     * @param codePoint
3064     *            the code point to check.
3065     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
3066     *         identifier; {@code false} otherwise.
3067     */
3068    public static boolean isUnicodeIdentifierPart(int codePoint) {
3069        return isUnicodeIdentifierPartImpl(codePoint);
3070    }
3071
3072    private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
3073
3074    /**
3075     * Indicates whether the specified character is a valid initial character
3076     * for a Unicode identifier.
3077     *
3078     * @param c
3079     *            the character to check.
3080     * @return {@code true} if {@code c} is a valid first character for a
3081     *         Unicode identifier; {@code false} otherwise.
3082     */
3083    public static boolean isUnicodeIdentifierStart(char c) {
3084        return isUnicodeIdentifierStartImpl(c);
3085    }
3086
3087    /**
3088     * Indicates whether the specified code point is a valid initial character
3089     * for a Unicode identifier.
3090     *
3091     * @param codePoint
3092     *            the code point to check.
3093     * @return {@code true} if {@code codePoint} is a valid first character for
3094     *         a Unicode identifier; {@code false} otherwise.
3095     */
3096    public static boolean isUnicodeIdentifierStart(int codePoint) {
3097        return isUnicodeIdentifierStartImpl(codePoint);
3098    }
3099
3100    private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
3101
3102    /**
3103     * Indicates whether the specified character is an upper case letter.
3104     *
3105     * @param c
3106     *            the character to check.
3107     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3108     *         otherwise.
3109     */
3110    public static boolean isUpperCase(char c) {
3111        return isUpperCase((int) c);
3112    }
3113
3114    /**
3115     * Indicates whether the specified code point is an upper case letter.
3116     *
3117     * @param codePoint
3118     *            the code point to check.
3119     * @return {@code true} if {@code codePoint} is a upper case letter;
3120     *         {@code false} otherwise.
3121     */
3122    public static boolean isUpperCase(int codePoint) {
3123        // Optimized case for ASCII
3124        if ('A' <= codePoint && codePoint <= 'Z') {
3125            return true;
3126        }
3127        if (codePoint < 128) {
3128            return false;
3129        }
3130        return isUpperCaseImpl(codePoint);
3131    }
3132
3133    private static native boolean isUpperCaseImpl(int codePoint);
3134
3135    /**
3136     * Indicates whether the specified character is a whitespace character in
3137     * Java.
3138     *
3139     * @param c
3140     *            the character to check.
3141     * @return {@code true} if the supplied {@code c} is a whitespace character
3142     *         in Java; {@code false} otherwise.
3143     */
3144    public static boolean isWhitespace(char c) {
3145        return isWhitespace((int) c);
3146    }
3147
3148    /**
3149     * Indicates whether the specified code point is a whitespace character in
3150     * Java.
3151     *
3152     * @param codePoint
3153     *            the code point to check.
3154     * @return {@code true} if the supplied {@code c} is a whitespace character
3155     *         in Java; {@code false} otherwise.
3156     */
3157    public static boolean isWhitespace(int codePoint) {
3158        // This is both an optimization and papers over differences between Java and ICU.
3159        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
3160            return true;
3161        }
3162        if (codePoint == 0x1680) {
3163            return true;
3164        }
3165        if (codePoint < 0x2000 || codePoint == 0x2007) {
3166            return false;
3167        }
3168        if (codePoint <= 0xffff) {
3169            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3170                    codePoint == 0x3000;
3171        }
3172        return isWhitespaceImpl(codePoint);
3173    }
3174
3175    private static native boolean isWhitespaceImpl(int codePoint);
3176
3177    /**
3178     * Reverses the order of the first and second byte in the specified
3179     * character.
3180     *
3181     * @param c
3182     *            the character to reverse.
3183     * @return the character with reordered bytes.
3184     */
3185    public static char reverseBytes(char c) {
3186        return (char)((c<<8) | (c>>8));
3187    }
3188
3189    /**
3190     * Returns the lower case equivalent for the specified character if the
3191     * character is an upper case letter. Otherwise, the specified character is
3192     * returned unchanged.
3193     *
3194     * @param c
3195     *            the character
3196     * @return if {@code c} is an upper case character then its lower case
3197     *         counterpart, otherwise just {@code c}.
3198     */
3199    public static char toLowerCase(char c) {
3200        return (char) toLowerCase((int) c);
3201    }
3202
3203    /**
3204     * Returns the lower case equivalent for the specified code point if it is
3205     * an upper case letter. Otherwise, the specified code point is returned
3206     * unchanged.
3207     *
3208     * @param codePoint
3209     *            the code point to check.
3210     * @return if {@code codePoint} is an upper case character then its lower
3211     *         case counterpart, otherwise just {@code codePoint}.
3212     */
3213    public static int toLowerCase(int codePoint) {
3214        // Optimized case for ASCII
3215        if ('A' <= codePoint && codePoint <= 'Z') {
3216            return (char) (codePoint + ('a' - 'A'));
3217        }
3218        if (codePoint < 192) {
3219            return codePoint;
3220        }
3221        return toLowerCaseImpl(codePoint);
3222    }
3223
3224    private static native int toLowerCaseImpl(int codePoint);
3225
3226    @Override
3227    public String toString() {
3228        return String.valueOf(value);
3229    }
3230
3231    /**
3232     * Converts the specified character to its string representation.
3233     *
3234     * @param value
3235     *            the character to convert.
3236     * @return the character converted to a string.
3237     */
3238    public static String toString(char value) {
3239        return String.valueOf(value);
3240    }
3241
3242    /**
3243     * Returns the title case equivalent for the specified character if it
3244     * exists. Otherwise, the specified character is returned unchanged.
3245     *
3246     * @param c
3247     *            the character to convert.
3248     * @return the title case equivalent of {@code c} if it exists, otherwise
3249     *         {@code c}.
3250     */
3251    public static char toTitleCase(char c) {
3252        return (char) toTitleCaseImpl(c);
3253    }
3254
3255    /**
3256     * Returns the title case equivalent for the specified code point if it
3257     * exists. Otherwise, the specified code point is returned unchanged.
3258     *
3259     * @param codePoint
3260     *            the code point to convert.
3261     * @return the title case equivalent of {@code codePoint} if it exists,
3262     *         otherwise {@code codePoint}.
3263     */
3264    public static int toTitleCase(int codePoint) {
3265        return toTitleCaseImpl(codePoint);
3266    }
3267
3268    private static native int toTitleCaseImpl(int codePoint);
3269
3270    /**
3271     * Returns the upper case equivalent for the specified character if the
3272     * character is a lower case letter. Otherwise, the specified character is
3273     * returned unchanged.
3274     *
3275     * @param c
3276     *            the character to convert.
3277     * @return if {@code c} is a lower case character then its upper case
3278     *         counterpart, otherwise just {@code c}.
3279     */
3280    public static char toUpperCase(char c) {
3281        return (char) toUpperCase((int) c);
3282    }
3283
3284    /**
3285     * Returns the upper case equivalent for the specified code point if the
3286     * code point is a lower case letter. Otherwise, the specified code point is
3287     * returned unchanged.
3288     *
3289     * @param codePoint
3290     *            the code point to convert.
3291     * @return if {@code codePoint} is a lower case character then its upper
3292     *         case counterpart, otherwise just {@code codePoint}.
3293     */
3294    public static int toUpperCase(int codePoint) {
3295        // Optimized case for ASCII
3296        if ('a' <= codePoint && codePoint <= 'z') {
3297            return (char) (codePoint - ('a' - 'A'));
3298        }
3299        if (codePoint < 181) {
3300            return codePoint;
3301        }
3302        return toUpperCaseImpl(codePoint);
3303    }
3304
3305    private static native int toUpperCaseImpl(int codePoint);
3306}
3307