Character.java revision b46dab348e2007bc08abaf7ecae34d89a2474e50
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21
22/**
23 * The wrapper for the primitive type {@code char}. This class also provides a
24 * number of utility methods for working with characters.
25 *
26 * <p>Character data is kept up to date as Unicode evolves.
27 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
28 * the {@code Locale} documentation for details of the Unicode versions implemented by current
29 * and historical Android releases.
30 *
31 * <p>The Unicode specification, character tables, and other information are available at
32 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
33 *
34 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
35 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
36 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
37 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
38 * encoding and {@code char} pairs are used to represent code points in the
39 * supplementary range. A pair of {@code char} values that represent a
40 * supplementary character are made up of a <i>high surrogate</i> with a value
41 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
42 * 0xDC00 to 0xDFFF.
43 * <p>
44 * On the Java platform a {@code char} value represents either a single BMP code
45 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
46 * is used to represent all Unicode code points.
47 *
48 * <a name="unicode_categories"><h3>Unicode categories</h3></a>
49 * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
50 * grouped semantically to provide a convenient overview. This table is also useful in
51 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
52 * <span class="datatable">
53 * <style type="text/css">
54 * .datatable td { padding-right: 20px; }
55 * </style>
56 * <p><table>
57 * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
58 * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
59 * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
60 * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
61 * <tr> <td> Cf </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
62 * <tr> <td><br></td> </tr>
63 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
64 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
65 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
66 * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
67 * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
68 * <tr> <td><br></td> </tr>
69 * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
70 * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
71 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
72 * <tr> <td><br></td> </tr>
73 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
74 * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
75 * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
76 * <tr> <td><br></td> </tr>
77 * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
78 * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
79 * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
80 * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
81 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
82 * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
83 * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
84 * <tr> <td><br></td> </tr>
85 * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
86 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
87 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
88 * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
89 * <tr> <td><br></td> </tr>
90 * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
91 * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
92 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
93 * </table>
94 * </span>
95 *
96 * @since 1.0
97 */
98public final class Character implements Serializable, Comparable<Character> {
99    private static final long serialVersionUID = 3786198910865385080L;
100
101    private final char value;
102
103    /**
104     * The minimum {@code Character} value.
105     */
106    public static final char MIN_VALUE = '\u0000';
107
108    /**
109     * The maximum {@code Character} value.
110     */
111    public static final char MAX_VALUE = '\uffff';
112
113    /**
114     * The minimum radix used for conversions between characters and integers.
115     */
116    public static final int MIN_RADIX = 2;
117
118    /**
119     * The maximum radix used for conversions between characters and integers.
120     */
121    public static final int MAX_RADIX = 36;
122
123    /**
124     * The {@link Class} object that represents the primitive type {@code char}.
125     */
126    @SuppressWarnings("unchecked")
127    public static final Class<Character> TYPE
128            = (Class<Character>) char[].class.getComponentType();
129
130    // Note: This can't be set to "char.class", since *that* is
131    // defined to be "java.lang.Character.TYPE";
132
133    /**
134     * Unicode category constant Cn.
135     */
136    public static final byte UNASSIGNED = 0;
137
138    /**
139     * Unicode category constant Lu.
140     */
141    public static final byte UPPERCASE_LETTER = 1;
142
143    /**
144     * Unicode category constant Ll.
145     */
146    public static final byte LOWERCASE_LETTER = 2;
147
148    /**
149     * Unicode category constant Lt.
150     */
151    public static final byte TITLECASE_LETTER = 3;
152
153    /**
154     * Unicode category constant Lm.
155     */
156    public static final byte MODIFIER_LETTER = 4;
157
158    /**
159     * Unicode category constant Lo.
160     */
161    public static final byte OTHER_LETTER = 5;
162
163    /**
164     * Unicode category constant Mn.
165     */
166    public static final byte NON_SPACING_MARK = 6;
167
168    /**
169     * Unicode category constant Me.
170     */
171    public static final byte ENCLOSING_MARK = 7;
172
173    /**
174     * Unicode category constant Mc.
175     */
176    public static final byte COMBINING_SPACING_MARK = 8;
177
178    /**
179     * Unicode category constant Nd.
180     */
181    public static final byte DECIMAL_DIGIT_NUMBER = 9;
182
183    /**
184     * Unicode category constant Nl.
185     */
186    public static final byte LETTER_NUMBER = 10;
187
188    /**
189     * Unicode category constant No.
190     */
191    public static final byte OTHER_NUMBER = 11;
192
193    /**
194     * Unicode category constant Zs.
195     */
196    public static final byte SPACE_SEPARATOR = 12;
197
198    /**
199     * Unicode category constant Zl.
200     */
201    public static final byte LINE_SEPARATOR = 13;
202
203    /**
204     * Unicode category constant Zp.
205     */
206    public static final byte PARAGRAPH_SEPARATOR = 14;
207
208    /**
209     * Unicode category constant Cc.
210     */
211    public static final byte CONTROL = 15;
212
213    /**
214     * Unicode category constant Cf.
215     */
216    public static final byte FORMAT = 16;
217
218    /**
219     * Unicode category constant Co.
220     */
221    public static final byte PRIVATE_USE = 18;
222
223    /**
224     * Unicode category constant Cs.
225     */
226    public static final byte SURROGATE = 19;
227
228    /**
229     * Unicode category constant Pd.
230     */
231    public static final byte DASH_PUNCTUATION = 20;
232
233    /**
234     * Unicode category constant Ps.
235     */
236    public static final byte START_PUNCTUATION = 21;
237
238    /**
239     * Unicode category constant Pe.
240     */
241    public static final byte END_PUNCTUATION = 22;
242
243    /**
244     * Unicode category constant Pc.
245     */
246    public static final byte CONNECTOR_PUNCTUATION = 23;
247
248    /**
249     * Unicode category constant Po.
250     */
251    public static final byte OTHER_PUNCTUATION = 24;
252
253    /**
254     * Unicode category constant Sm.
255     */
256    public static final byte MATH_SYMBOL = 25;
257
258    /**
259     * Unicode category constant Sc.
260     */
261    public static final byte CURRENCY_SYMBOL = 26;
262
263    /**
264     * Unicode category constant Sk.
265     */
266    public static final byte MODIFIER_SYMBOL = 27;
267
268    /**
269     * Unicode category constant So.
270     */
271    public static final byte OTHER_SYMBOL = 28;
272
273    /**
274     * Unicode category constant Pi.
275     *
276     * @since 1.4
277     */
278    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
279
280    /**
281     * Unicode category constant Pf.
282     *
283     * @since 1.4
284     */
285    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
286
287    /**
288     * Unicode bidirectional constant.
289     *
290     * @since 1.4
291     */
292    public static final byte DIRECTIONALITY_UNDEFINED = -1;
293
294    /**
295     * Unicode bidirectional constant L.
296     *
297     * @since 1.4
298     */
299    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
300
301    /**
302     * Unicode bidirectional constant R.
303     *
304     * @since 1.4
305     */
306    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
307
308    /**
309     * Unicode bidirectional constant AL.
310     *
311     * @since 1.4
312     */
313    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
314
315    /**
316     * Unicode bidirectional constant EN.
317     *
318     * @since 1.4
319     */
320    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
321
322    /**
323     * Unicode bidirectional constant ES.
324     *
325     * @since 1.4
326     */
327    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
328
329    /**
330     * Unicode bidirectional constant ET.
331     *
332     * @since 1.4
333     */
334    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
335
336    /**
337     * Unicode bidirectional constant AN.
338     *
339     * @since 1.4
340     */
341    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
342
343    /**
344     * Unicode bidirectional constant CS.
345     *
346     * @since 1.4
347     */
348    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
349
350    /**
351     * Unicode bidirectional constant NSM.
352     *
353     * @since 1.4
354     */
355    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
356
357    /**
358     * Unicode bidirectional constant BN.
359     *
360     * @since 1.4
361     */
362    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
363
364    /**
365     * Unicode bidirectional constant B.
366     *
367     * @since 1.4
368     */
369    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
370
371    /**
372     * Unicode bidirectional constant S.
373     *
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
377
378    /**
379     * Unicode bidirectional constant WS.
380     *
381     * @since 1.4
382     */
383    public static final byte DIRECTIONALITY_WHITESPACE = 12;
384
385    /**
386     * Unicode bidirectional constant ON.
387     *
388     * @since 1.4
389     */
390    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
391
392    /**
393     * Unicode bidirectional constant LRE.
394     *
395     * @since 1.4
396     */
397    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
398
399    /**
400     * Unicode bidirectional constant LRO.
401     *
402     * @since 1.4
403     */
404    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
405
406    /**
407     * Unicode bidirectional constant RLE.
408     *
409     * @since 1.4
410     */
411    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
412
413    /**
414     * Unicode bidirectional constant RLO.
415     *
416     * @since 1.4
417     */
418    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
419
420    /**
421     * Unicode bidirectional constant PDF.
422     *
423     * @since 1.4
424     */
425    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
426
427    /**
428     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
429     * encoding, {@code '\uD800'}.
430     *
431     * @since 1.5
432     */
433    public static final char MIN_HIGH_SURROGATE = '\uD800';
434
435    /**
436     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
437     * encoding, {@code '\uDBFF'}.
438     *
439     * @since 1.5
440     */
441    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
442
443    /**
444     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
445     * encoding, {@code '\uDC00'}.
446     *
447     * @since 1.5
448     */
449    public static final char MIN_LOW_SURROGATE = '\uDC00';
450
451    /**
452     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
453     * encoding, {@code '\uDFFF'}.
454     *
455     * @since 1.5
456     */
457    public static final char MAX_LOW_SURROGATE = '\uDFFF';
458
459    /**
460     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
461     *
462     * @since 1.5
463     */
464    public static final char MIN_SURROGATE = '\uD800';
465
466    /**
467     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
468     *
469     * @since 1.5
470     */
471    public static final char MAX_SURROGATE = '\uDFFF';
472
473    /**
474     * The minimum value of a supplementary code point, {@code U+010000}.
475     *
476     * @since 1.5
477     */
478    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
479
480    /**
481     * The minimum code point value, {@code U+0000}.
482     *
483     * @since 1.5
484     */
485    public static final int MIN_CODE_POINT = 0x000000;
486
487    /**
488     * The maximum code point value, {@code U+10FFFF}.
489     *
490     * @since 1.5
491     */
492    public static final int MAX_CODE_POINT = 0x10FFFF;
493
494    /**
495     * The number of bits required to represent a {@code Character} value
496     * unsigned form.
497     *
498     * @since 1.5
499     */
500    public static final int SIZE = 16;
501
502    private static final byte[] DIRECTIONALITY = new byte[] {
503            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
504            DIRECTIONALITY_EUROPEAN_NUMBER,
505            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
506            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
507            DIRECTIONALITY_ARABIC_NUMBER,
508            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
509            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
510            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
511            DIRECTIONALITY_OTHER_NEUTRALS,
512            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
513            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
514            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
515            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
516            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
517            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
518            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
519
520    /*
521     * Represents a subset of the Unicode character set.
522     */
523    public static class Subset {
524        String name;
525
526        /**
527         * Constructs a new {@code Subset}.
528         *
529         * @param string
530         *            this subset's name.
531         */
532        protected Subset(String string) {
533            if (string == null) {
534                throw new NullPointerException();
535            }
536            name = string;
537        }
538
539        /**
540         * Compares this character subset with the specified object. Uses
541         * {@link java.lang.Object#equals(Object)} to do the comparison.
542         *
543         * @param object
544         *            the object to compare this character subset with.
545         * @return {@code true} if {@code object} is this subset, that is, if
546         *         {@code object == this}; {@code false} otherwise.
547         */
548        @Override
549        public final boolean equals(Object object) {
550            return super.equals(object);
551        }
552
553        /**
554         * Returns the integer hash code for this character subset.
555         *
556         * @return this subset's hash code, which is the hash code computed by
557         *         {@link java.lang.Object#hashCode()}.
558         */
559        @Override
560        public final int hashCode() {
561            return super.hashCode();
562        }
563
564        /**
565         * Returns the string representation of this subset.
566         *
567         * @return this subset's name.
568         */
569        @Override
570        public final String toString() {
571            return name;
572        }
573    }
574
575    /**
576     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
577     * specification.
578     *
579     * @since 1.2
580     */
581    public static final class UnicodeBlock extends Subset {
582        /**
583         * The &quot;Surrogates Area&quot; Unicode Block.
584         *
585         * @deprecated As of Java 5, this block has been replaced by
586         *             {@link #HIGH_SURROGATES},
587         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
588         *             {@link #LOW_SURROGATES}.
589         */
590        @Deprecated
591        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
592        /**
593         * The &quot;Basic Latin&quot; Unicode Block.
594         *
595         * @since 1.2
596         */
597        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
598        /**
599         * The &quot;Latin-1 Supplement&quot; Unicode Block.
600         *
601         * @since 1.2
602         */
603        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
604        /**
605         * The &quot;Latin Extended-A&quot; Unicode Block.
606         *
607         * @since 1.2
608         */
609        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
610        /**
611         * The &quot;Latin Extended-B&quot; Unicode Block.
612         *
613         * @since 1.2
614         */
615        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
616        /**
617         * The &quot;IPA Extensions&quot; Unicode Block.
618         *
619         * @since 1.2
620         */
621        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
622        /**
623         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
624         *
625         * @since 1.2
626         */
627        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
628        /**
629         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
630         *
631         * @since 1.2
632         */
633        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
634        /**
635         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
636         * to as &quot;Greek&quot;.
637         *
638         * @since 1.2
639         */
640        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
641        /**
642         * The &quot;Cyrillic&quot; Unicode Block.
643         *
644         * @since 1.2
645         */
646        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
647        /**
648         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
649         * referred to as &quot;Cyrillic Supplementary&quot;.
650         *
651         * @since 1.5
652         */
653        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
654        /**
655         * The &quot;Armenian&quot; Unicode Block.
656         *
657         * @since 1.2
658         */
659        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
660        /**
661         * The &quot;Hebrew&quot; Unicode Block.
662         *
663         * @since 1.2
664         */
665        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
666        /**
667         * The &quot;Arabic&quot; Unicode Block.
668         *
669         * @since 1.2
670         */
671        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
672        /**
673         * The &quot;Syriac&quot; Unicode Block.
674         *
675         * @since 1.4
676         */
677        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
678        /**
679         * The &quot;Thaana&quot; Unicode Block.
680         *
681         * @since 1.4
682         */
683        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
684        /**
685         * The &quot;Devanagari&quot; Unicode Block.
686         *
687         * @since 1.2
688         */
689        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
690        /**
691         * The &quot;Bengali&quot; Unicode Block.
692         *
693         * @since 1.2
694         */
695        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
696        /**
697         * The &quot;Gurmukhi&quot; Unicode Block.
698         *
699         * @since 1.2
700         */
701        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
702        /**
703         * The &quot;Gujarati&quot; Unicode Block.
704         *
705         * @since 1.2
706         */
707        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
708        /**
709         * The &quot;Oriya&quot; Unicode Block.
710         *
711         * @since 1.2
712         */
713        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
714        /**
715         * The &quot;Tamil&quot; Unicode Block.
716         *
717         * @since 1.2
718         */
719        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
720        /**
721         * The &quot;Telugu&quot; Unicode Block.
722         *
723         * @since 1.2
724         */
725        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
726        /**
727         * The &quot;Kannada&quot; Unicode Block.
728         *
729         * @since 1.2
730         */
731        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
732        /**
733         * The &quot;Malayalam&quot; Unicode Block.
734         *
735         * @since 1.2
736         */
737        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
738        /**
739         * The &quot;Sinhala&quot; Unicode Block.
740         *
741         * @since 1.4
742         */
743        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
744        /**
745         * The &quot;Thai&quot; Unicode Block.
746         *
747         * @since 1.2
748         */
749        public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
750        /**
751         * The &quot;Lao&quot; Unicode Block.
752         *
753         * @since 1.2
754         */
755        public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
756        /**
757         * The &quot;Tibetan&quot; Unicode Block.
758         *
759         * @since 1.2
760         */
761        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
762        /**
763         * The &quot;Myanmar&quot; Unicode Block.
764         *
765         * @since 1.4
766         */
767        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
768        /**
769         * The &quot;Georgian&quot; Unicode Block.
770         *
771         * @since 1.2
772         */
773        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
774        /**
775         * The &quot;Hangul Jamo&quot; Unicode Block.
776         *
777         * @since 1.2
778         */
779        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
780        /**
781         * The &quot;Ethiopic&quot; Unicode Block.
782         *
783         * @since 1.4
784         */
785        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
786        /**
787         * The &quot;Cherokee&quot; Unicode Block.
788         *
789         * @since 1.4
790         */
791        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
792        /**
793         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
794         *
795         * @since 1.4
796         */
797        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
798        /**
799         * The &quot;Ogham&quot; Unicode Block.
800         *
801         * @since 1.4
802         */
803        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
804        /**
805         * The &quot;Runic&quot; Unicode Block.
806         *
807         * @since 1.4
808         */
809        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
810        /**
811         * The &quot;Tagalog&quot; Unicode Block.
812         *
813         * @since 1.5
814         */
815        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
816        /**
817         * The &quot;Hanunoo&quot; Unicode Block.
818         *
819         * @since 1.5
820         */
821        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
822        /**
823         * The &quot;Buhid&quot; Unicode Block.
824         *
825         * @since 1.5
826         */
827        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
828        /**
829         * The &quot;Tagbanwa&quot; Unicode Block.
830         *
831         * @since 1.5
832         */
833        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
834        /**
835         * The &quot;Khmer&quot; Unicode Block.
836         *
837         * @since 1.4
838         */
839        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
840        /**
841         * The &quot;Mongolian&quot; Unicode Block.
842         *
843         * @since 1.4
844         */
845        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
846        /**
847         * The &quot;Limbu&quot; Unicode Block.
848         *
849         * @since 1.5
850         */
851        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
852        /**
853         * The &quot;Tai Le&quot; Unicode Block.
854         *
855         * @since 1.5
856         */
857        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
858        /**
859         * The &quot;Khmer Symbols&quot; Unicode Block.
860         *
861         * @since 1.5
862         */
863        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
864        /**
865         * The &quot;Phonetic Extensions&quot; Unicode Block.
866         *
867         * @since 1.5
868         */
869        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
870        /**
871         * The &quot;Latin Extended Additional&quot; Unicode Block.
872         *
873         * @since 1.2
874         */
875        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
876        /**
877         * The &quot;Greek Extended&quot; Unicode Block.
878         *
879         * @since 1.2
880         */
881        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
882        /**
883         * The &quot;General Punctuation&quot; Unicode Block.
884         *
885         * @since 1.2
886         */
887        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
888        /**
889         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
890         *
891         * @since 1.2
892         */
893        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
894        /**
895         * The &quot;Currency Symbols&quot; Unicode Block.
896         *
897         * @since 1.2
898         */
899        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
900        /**
901         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
902         * Block. Previously referred to as &quot;Combining Marks for
903         * Symbols&quot;.
904         *
905         * @since 1.2
906         */
907        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
908        /**
909         * The &quot;Letterlike Symbols&quot; Unicode Block.
910         *
911         * @since 1.2
912         */
913        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
914        /**
915         * The &quot;Number Forms&quot; Unicode Block.
916         *
917         * @since 1.2
918         */
919        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
920        /**
921         * The &quot;Arrows&quot; Unicode Block.
922         *
923         * @since 1.2
924         */
925        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
926        /**
927         * The &quot;Mathematical Operators&quot; Unicode Block.
928         *
929         * @since 1.2
930         */
931        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
932        /**
933         * The &quot;Miscellaneous Technical&quot; Unicode Block.
934         *
935         * @since 1.2
936         */
937        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
938        /**
939         * The &quot;Control Pictures&quot; Unicode Block.
940         *
941         * @since 1.2
942         */
943        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
944        /**
945         * The &quot;Optical Character Recognition&quot; Unicode Block.
946         *
947         * @since 1.2
948         */
949        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
950        /**
951         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
952         *
953         * @since 1.2
954         */
955        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
956        /**
957         * The &quot;Box Drawing&quot; Unicode Block.
958         *
959         * @since 1.2
960         */
961        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
962        /**
963         * The &quot;Block Elements&quot; Unicode Block.
964         *
965         * @since 1.2
966         */
967        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
968        /**
969         * The &quot;Geometric Shapes&quot; Unicode Block.
970         *
971         * @since 1.2
972         */
973        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
974        /**
975         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
976         *
977         * @since 1.2
978         */
979        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
980        /**
981         * The &quot;Dingbats&quot; Unicode Block.
982         *
983         * @since 1.2
984         */
985        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
986        /**
987         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
988         *
989         * @since 1.5
990         */
991        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
992        /**
993         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
994         *
995         * @since 1.5
996         */
997        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
998        /**
999         * The &quot;Braille Patterns&quot; Unicode Block.
1000         *
1001         * @since 1.4
1002         */
1003        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
1004        /**
1005         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
1006         *
1007         * @since 1.5
1008         */
1009        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1010        /**
1011         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
1012         *
1013         * @since 1.5
1014         */
1015        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1016        /**
1017         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1018         *
1019         * @since 1.5
1020         */
1021        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1022        /**
1023         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1024         *
1025         * @since 1.2
1026         */
1027        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1028        /**
1029         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1030         *
1031         * @since 1.4
1032         */
1033        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1034        /**
1035         * The &quot;Kangxi Radicals&quot; Unicode Block.
1036         *
1037         * @since 1.4
1038         */
1039        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
1040        /**
1041         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1042         *
1043         * @since 1.4
1044         */
1045        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1046        /**
1047         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1048         *
1049         * @since 1.2
1050         */
1051        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1052        /**
1053         * The &quot;Hiragana&quot; Unicode Block.
1054         *
1055         * @since 1.2
1056         */
1057        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
1058        /**
1059         * The &quot;Katakana&quot; Unicode Block.
1060         *
1061         * @since 1.2
1062         */
1063        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
1064        /**
1065         * The &quot;Bopomofo&quot; Unicode Block.
1066         *
1067         * @since 1.2
1068         */
1069        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
1070        /**
1071         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1072         *
1073         * @since 1.2
1074         */
1075        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1076        /**
1077         * The &quot;Kanbun&quot; Unicode Block.
1078         *
1079         * @since 1.2
1080         */
1081        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
1082        /**
1083         * The &quot;Bopomofo Extended&quot; Unicode Block.
1084         *
1085         * @since 1.4
1086         */
1087        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1088        /**
1089         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1090         *
1091         * @since 1.5
1092         */
1093        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1094        /**
1095         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1096         *
1097         * @since 1.2
1098         */
1099        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1100        /**
1101         * The &quot;CJK Compatibility&quot; Unicode Block.
1102         *
1103         * @since 1.2
1104         */
1105        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
1106        /**
1107         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1108         *
1109         * @since 1.4
1110         */
1111        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1112        /**
1113         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1114         *
1115         * @since 1.5
1116         */
1117        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1118        /**
1119         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1120         *
1121         * @since 1.2
1122         */
1123        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1124        /**
1125         * The &quot;Yi Syllables&quot; Unicode Block.
1126         *
1127         * @since 1.4
1128         */
1129        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
1130        /**
1131         * The &quot;Yi Radicals&quot; Unicode Block.
1132         *
1133         * @since 1.4
1134         */
1135        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
1136        /**
1137         * The &quot;Hangul Syllables&quot; Unicode Block.
1138         *
1139         * @since 1.2
1140         */
1141        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
1142        /**
1143         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1144         * code point values in the high surrogate range 0xD800 to 0xDB7F
1145         */
1146        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
1147        /**
1148         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1149         * represents code point values in the high surrogate range 0xDB80 to
1150         * 0xDBFF
1151         */
1152        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1153        /**
1154         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1155         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1156         */
1157        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
1158        /**
1159         * The &quot;Private Use Area&quot; Unicode Block.
1160         *
1161         * @since 1.2
1162         */
1163        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1164        /**
1165         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1166         *
1167         * @since 1.2
1168         */
1169        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1170        /**
1171         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1172         *
1173         * @since 1.2
1174         */
1175        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1176        /**
1177         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1178         *
1179         * @since 1.2
1180         */
1181        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1182        /**
1183         * The &quot;Variation Selectors&quot; Unicode Block.
1184         *
1185         * @since 1.5
1186         */
1187        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1188        /**
1189         * The &quot;Combining Half Marks&quot; Unicode Block.
1190         *
1191         * @since 1.2
1192         */
1193        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1194        /**
1195         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1196         *
1197         * @since 1.2
1198         */
1199        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1200        /**
1201         * The &quot;Small Form Variants&quot; Unicode Block.
1202         *
1203         * @since 1.2
1204         */
1205        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1206        /**
1207         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1208         *
1209         * @since 1.2
1210         */
1211        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1212        /**
1213         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1214         *
1215         * @since 1.2
1216         */
1217        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1218        /**
1219         * The &quot;Specials&quot; Unicode Block.
1220         *
1221         * @since 1.2
1222         */
1223        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
1224        /**
1225         * The &quot;Linear B Syllabary&quot; Unicode Block.
1226         *
1227         * @since 1.2
1228         */
1229        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1230        /**
1231         * The &quot;Linear B Ideograms&quot; Unicode Block.
1232         *
1233         * @since 1.5
1234         */
1235        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1236        /**
1237         * The &quot;Aegean Numbers&quot; Unicode Block.
1238         *
1239         * @since 1.5
1240         */
1241        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
1242        /**
1243         * The &quot;Old Italic&quot; Unicode Block.
1244         *
1245         * @since 1.5
1246         */
1247        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
1248        /**
1249         * The &quot;Gothic&quot; Unicode Block.
1250         *
1251         * @since 1.5
1252         */
1253        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
1254        /**
1255         * The &quot;Ugaritic&quot; Unicode Block.
1256         *
1257         * @since 1.5
1258         */
1259        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
1260        /**
1261         * The &quot;Deseret&quot; Unicode Block.
1262         *
1263         * @since 1.5
1264         */
1265        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
1266        /**
1267         * The &quot;Shavian&quot; Unicode Block.
1268         *
1269         * @since 1.5
1270         */
1271        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
1272        /**
1273         * The &quot;Osmanya&quot; Unicode Block.
1274         *
1275         * @since 1.5
1276         */
1277        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
1278        /**
1279         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1280         *
1281         * @since 1.5
1282         */
1283        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1284        /**
1285         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1286         *
1287         * @since 1.5
1288         */
1289        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1290        /**
1291         * The &quot;Musical Symbols&quot; Unicode Block.
1292         *
1293         * @since 1.5
1294         */
1295        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1296        /**
1297         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1298         *
1299         * @since 1.5
1300         */
1301        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1302        /**
1303         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1304         *
1305         * @since 1.5
1306         */
1307        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1308        /**
1309         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1310         *
1311         * @since 1.5
1312         */
1313        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1314        /**
1315         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1316         *
1317         * @since 1.5
1318         */
1319        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
1320        /**
1321         * The &quot;Tags&quot; Unicode Block.
1322         *
1323         * @since 1.5
1324         */
1325        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
1326        /**
1327         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1328         *
1329         * @since 1.5
1330         */
1331        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1332        /**
1333         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1334         *
1335         * @since 1.5
1336         */
1337        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1338        /**
1339         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1340         *
1341         * @since 1.5
1342         */
1343        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1344
1345        /*
1346         * All of the UnicodeBlocks with valid ranges in ascending order.
1347         */
1348        private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
1349            null,
1350            UnicodeBlock.BASIC_LATIN,
1351            UnicodeBlock.LATIN_1_SUPPLEMENT,
1352            UnicodeBlock.LATIN_EXTENDED_A,
1353            UnicodeBlock.LATIN_EXTENDED_B,
1354            UnicodeBlock.IPA_EXTENSIONS,
1355            UnicodeBlock.SPACING_MODIFIER_LETTERS,
1356            UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
1357            UnicodeBlock.GREEK,
1358            UnicodeBlock.CYRILLIC,
1359            UnicodeBlock.ARMENIAN,
1360            UnicodeBlock.HEBREW,
1361            UnicodeBlock.ARABIC,
1362            UnicodeBlock.SYRIAC,
1363            UnicodeBlock.THAANA,
1364            UnicodeBlock.DEVANAGARI,
1365            UnicodeBlock.BENGALI,
1366            UnicodeBlock.GURMUKHI,
1367            UnicodeBlock.GUJARATI,
1368            UnicodeBlock.ORIYA,
1369            UnicodeBlock.TAMIL,
1370            UnicodeBlock.TELUGU,
1371            UnicodeBlock.KANNADA,
1372            UnicodeBlock.MALAYALAM,
1373            UnicodeBlock.SINHALA,
1374            UnicodeBlock.THAI,
1375            UnicodeBlock.LAO,
1376            UnicodeBlock.TIBETAN,
1377            UnicodeBlock.MYANMAR,
1378            UnicodeBlock.GEORGIAN,
1379            UnicodeBlock.HANGUL_JAMO,
1380            UnicodeBlock.ETHIOPIC,
1381            UnicodeBlock.CHEROKEE,
1382            UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1383            UnicodeBlock.OGHAM,
1384            UnicodeBlock.RUNIC,
1385            UnicodeBlock.KHMER,
1386            UnicodeBlock.MONGOLIAN,
1387            UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
1388            UnicodeBlock.GREEK_EXTENDED,
1389            UnicodeBlock.GENERAL_PUNCTUATION,
1390            UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
1391            UnicodeBlock.CURRENCY_SYMBOLS,
1392            UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
1393            UnicodeBlock.LETTERLIKE_SYMBOLS,
1394            UnicodeBlock.NUMBER_FORMS,
1395            UnicodeBlock.ARROWS,
1396            UnicodeBlock.MATHEMATICAL_OPERATORS,
1397            UnicodeBlock.MISCELLANEOUS_TECHNICAL,
1398            UnicodeBlock.CONTROL_PICTURES,
1399            UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
1400            UnicodeBlock.ENCLOSED_ALPHANUMERICS,
1401            UnicodeBlock.BOX_DRAWING,
1402            UnicodeBlock.BLOCK_ELEMENTS,
1403            UnicodeBlock.GEOMETRIC_SHAPES,
1404            UnicodeBlock.MISCELLANEOUS_SYMBOLS,
1405            UnicodeBlock.DINGBATS,
1406            UnicodeBlock.BRAILLE_PATTERNS,
1407            UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
1408            UnicodeBlock.KANGXI_RADICALS,
1409            UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1410            UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
1411            UnicodeBlock.HIRAGANA,
1412            UnicodeBlock.KATAKANA,
1413            UnicodeBlock.BOPOMOFO,
1414            UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
1415            UnicodeBlock.KANBUN,
1416            UnicodeBlock.BOPOMOFO_EXTENDED,
1417            UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
1418            UnicodeBlock.CJK_COMPATIBILITY,
1419            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1420            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
1421            UnicodeBlock.YI_SYLLABLES,
1422            UnicodeBlock.YI_RADICALS,
1423            UnicodeBlock.HANGUL_SYLLABLES,
1424            UnicodeBlock.HIGH_SURROGATES,
1425            UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
1426            UnicodeBlock.LOW_SURROGATES,
1427            UnicodeBlock.PRIVATE_USE_AREA,
1428            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
1429            UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
1430            UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
1431            UnicodeBlock.COMBINING_HALF_MARKS,
1432            UnicodeBlock.CJK_COMPATIBILITY_FORMS,
1433            UnicodeBlock.SMALL_FORM_VARIANTS,
1434            UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
1435            UnicodeBlock.SPECIALS,
1436            UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
1437            UnicodeBlock.OLD_ITALIC,
1438            UnicodeBlock.GOTHIC,
1439            UnicodeBlock.DESERET,
1440            UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
1441            UnicodeBlock.MUSICAL_SYMBOLS,
1442            UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1443            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1444            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1445            UnicodeBlock.TAGS,
1446            UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
1447            UnicodeBlock.TAGALOG,
1448            UnicodeBlock.HANUNOO,
1449            UnicodeBlock.BUHID,
1450            UnicodeBlock.TAGBANWA,
1451            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1452            UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
1453            UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
1454            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1455            UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1456            UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
1457            UnicodeBlock.VARIATION_SELECTORS,
1458            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1459            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1460            UnicodeBlock.LIMBU,
1461            UnicodeBlock.TAI_LE,
1462            UnicodeBlock.KHMER_SYMBOLS,
1463            UnicodeBlock.PHONETIC_EXTENSIONS,
1464            UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1465            UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
1466            UnicodeBlock.LINEAR_B_SYLLABARY,
1467            UnicodeBlock.LINEAR_B_IDEOGRAMS,
1468            UnicodeBlock.AEGEAN_NUMBERS,
1469            UnicodeBlock.UGARITIC,
1470            UnicodeBlock.SHAVIAN,
1471            UnicodeBlock.OSMANYA,
1472            UnicodeBlock.CYPRIOT_SYLLABARY,
1473            UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
1474            UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
1475        };
1476
1477        /**
1478         * Retrieves the constant that corresponds to the specified block name.
1479         * The block names are defined by the Unicode 4.0.1 specification in the
1480         * {@code Blocks-4.0.1.txt} file.
1481         * <p>
1482         * Block names may be one of the following:
1483         * <ul>
1484         * <li>Canonical block name, as defined by the Unicode specification;
1485         * case-insensitive.</li>
1486         * <li>Canonical block name without any spaces, as defined by the
1487         * Unicode specification; case-insensitive.</li>
1488         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1489         * uppercasing the canonical name and replacing all spaces and hyphens
1490         * with underscores.</li>
1491         * </ul>
1492         *
1493         * @param blockName
1494         *            the name of the block to retrieve.
1495         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1496         * @throws NullPointerException
1497         *             if {@code blockName} is {@code null}.
1498         * @throws IllegalArgumentException
1499         *             if {@code blockName} is not a valid block name.
1500         * @since 1.5
1501         */
1502        public static UnicodeBlock forName(String blockName) {
1503            if (blockName == null) {
1504                throw new NullPointerException();
1505            }
1506            int block = forNameImpl(blockName);
1507            if (block == -1) {
1508                if (blockName.equals("SURROGATES_AREA")) {
1509                    return SURROGATES_AREA;
1510                } else if(blockName.equalsIgnoreCase("greek")) {
1511                    return GREEK;
1512                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1513                        blockName.equals("Combining Marks for Symbols") ||
1514                        blockName.equals("CombiningMarksforSymbols")) {
1515                    return COMBINING_MARKS_FOR_SYMBOLS;
1516                }
1517                throw new IllegalArgumentException();
1518            }
1519            return BLOCKS[block];
1520        }
1521
1522        /**
1523         * Gets the constant for the Unicode block that contains the specified
1524         * character.
1525         *
1526         * @param c
1527         *            the character for which to get the {@code UnicodeBlock}
1528         *            constant.
1529         * @return the {@code UnicodeBlock} constant for the block that contains
1530         *         {@code c}, or {@code null} if {@code c} does not belong to
1531         *         any defined block.
1532         */
1533        public static UnicodeBlock of(char c) {
1534            return of((int) c);
1535        }
1536
1537        /**
1538         * Gets the constant for the Unicode block that contains the specified
1539         * Unicode code point.
1540         *
1541         * @param codePoint
1542         *            the Unicode code point for which to get the
1543         *            {@code UnicodeBlock} constant.
1544         * @return the {@code UnicodeBlock} constant for the block that contains
1545         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1546         *         not belong to any defined block.
1547         * @throws IllegalArgumentException
1548         *             if {@code codePoint} is not a valid Unicode code point.
1549         * @since 1.5
1550         */
1551        public static UnicodeBlock of(int codePoint) {
1552            if (!isValidCodePoint(codePoint)) {
1553                throw new IllegalArgumentException();
1554            }
1555            int block = ofImpl(codePoint);
1556            if (block == -1 || block >= BLOCKS.length) {
1557                return null;
1558            }
1559            return BLOCKS[block];
1560        }
1561
1562        private UnicodeBlock(String blockName, int start, int end) {
1563            super(blockName);
1564        }
1565    }
1566
1567    private static native int forNameImpl(String blockName);
1568
1569    private static native int ofImpl(int codePoint);
1570
1571    /**
1572     * Constructs a new {@code Character} with the specified primitive char
1573     * value.
1574     *
1575     * @param value
1576     *            the primitive char value to store in the new instance.
1577     */
1578    public Character(char value) {
1579        this.value = value;
1580    }
1581
1582    /**
1583     * Gets the primitive value of this character.
1584     *
1585     * @return this object's primitive value.
1586     */
1587    public char charValue() {
1588        return value;
1589    }
1590
1591    /**
1592     * Compares this object to the specified character object to determine their
1593     * relative order.
1594     *
1595     * @param c
1596     *            the character object to compare this object to.
1597     * @return {@code 0} if the value of this character and the value of
1598     *         {@code c} are equal; a positive value if the value of this
1599     *         character is greater than the value of {@code c}; a negative
1600     *         value if the value of this character is less than the value of
1601     *         {@code c}.
1602     * @see java.lang.Comparable
1603     * @since 1.2
1604     */
1605    public int compareTo(Character c) {
1606        return value - c.value;
1607    }
1608
1609    /**
1610     * Returns a {@code Character} instance for the {@code char} value passed.
1611     * <p>
1612     * If it is not necessary to get a new {@code Character} instance, it is
1613     * recommended to use this method instead of the constructor, since it
1614     * maintains a cache of instances which may result in better performance.
1615     *
1616     * @param c
1617     *            the char value for which to get a {@code Character} instance.
1618     * @return the {@code Character} instance for {@code c}.
1619     * @since 1.5
1620     */
1621    public static Character valueOf(char c) {
1622        return c < 128 ? SMALL_VALUES[c] : new Character(c);
1623    }
1624
1625    /**
1626     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1627     */
1628    private static final Character[] SMALL_VALUES = new Character[128];
1629
1630    static {
1631        for(int i = 0; i < 128; i++) {
1632            SMALL_VALUES[i] = new Character((char) i);
1633        }
1634    }
1635    /**
1636     * Indicates whether {@code codePoint} is a valid Unicode code point.
1637     *
1638     * @param codePoint
1639     *            the code point to test.
1640     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1641     *         {@code false} otherwise.
1642     * @since 1.5
1643     */
1644    public static boolean isValidCodePoint(int codePoint) {
1645        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1646    }
1647
1648    /**
1649     * Indicates whether {@code codePoint} is within the supplementary code
1650     * point range.
1651     *
1652     * @param codePoint
1653     *            the code point to test.
1654     * @return {@code true} if {@code codePoint} is within the supplementary
1655     *         code point range; {@code false} otherwise.
1656     * @since 1.5
1657     */
1658    public static boolean isSupplementaryCodePoint(int codePoint) {
1659        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1660    }
1661
1662    /**
1663     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1664     * that is used for representing supplementary characters in UTF-16
1665     * encoding.
1666     *
1667     * @param ch
1668     *            the character to test.
1669     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1670     *         {@code false} otherwise.
1671     * @see #isLowSurrogate(char)
1672     * @since 1.5
1673     */
1674    public static boolean isHighSurrogate(char ch) {
1675        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1676    }
1677
1678    /**
1679     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1680     * that is used for representing supplementary characters in UTF-16
1681     * encoding.
1682     *
1683     * @param ch
1684     *            the character to test.
1685     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1686     *         {@code false} otherwise.
1687     * @see #isHighSurrogate(char)
1688     * @since 1.5
1689     */
1690    public static boolean isLowSurrogate(char ch) {
1691        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1692    }
1693
1694    /**
1695     * Indicates whether the specified character pair is a valid surrogate pair.
1696     *
1697     * @param high
1698     *            the high surrogate unit to test.
1699     * @param low
1700     *            the low surrogate unit to test.
1701     * @return {@code true} if {@code high} is a high-surrogate code unit and
1702     *         {@code low} is a low-surrogate code unit; {@code false}
1703     *         otherwise.
1704     * @see #isHighSurrogate(char)
1705     * @see #isLowSurrogate(char)
1706     * @since 1.5
1707     */
1708    public static boolean isSurrogatePair(char high, char low) {
1709        return (isHighSurrogate(high) && isLowSurrogate(low));
1710    }
1711
1712    /**
1713     * Calculates the number of {@code char} values required to represent the
1714     * specified Unicode code point. This method checks if the {@code codePoint}
1715     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1716     * returned, otherwise {@code 1}. To test if the code point is valid, use
1717     * the {@link #isValidCodePoint(int)} method.
1718     *
1719     * @param codePoint
1720     *            the code point for which to calculate the number of required
1721     *            chars.
1722     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1723     * @see #isValidCodePoint(int)
1724     * @see #isSupplementaryCodePoint(int)
1725     * @since 1.5
1726     */
1727    public static int charCount(int codePoint) {
1728        return (codePoint >= 0x10000 ? 2 : 1);
1729    }
1730
1731    /**
1732     * Converts a surrogate pair into a Unicode code point. This method assumes
1733     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1734     * surrogates, then the result is indeterminate. The
1735     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1736     * method to validate the pair.
1737     *
1738     * @param high
1739     *            the high surrogate unit.
1740     * @param low
1741     *            the low surrogate unit.
1742     * @return the Unicode code point corresponding to the surrogate unit pair.
1743     * @see #isSurrogatePair(char, char)
1744     * @since 1.5
1745     */
1746    public static int toCodePoint(char high, char low) {
1747        // See RFC 2781, Section 2.2
1748        // http://www.ietf.org/rfc/rfc2781.txt
1749        int h = (high & 0x3FF) << 10;
1750        int l = low & 0x3FF;
1751        return (h | l) + 0x10000;
1752    }
1753
1754    /**
1755     * Returns the code point at {@code index} in the specified sequence of
1756     * character units. If the unit at {@code index} is a high-surrogate unit,
1757     * {@code index + 1} is less than the length of the sequence and the unit at
1758     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1759     * point represented by the pair is returned; otherwise the {@code char}
1760     * value at {@code index} is returned.
1761     *
1762     * @param seq
1763     *            the source sequence of {@code char} units.
1764     * @param index
1765     *            the position in {@code seq} from which to retrieve the code
1766     *            point.
1767     * @return the Unicode code point or {@code char} value at {@code index} in
1768     *         {@code seq}.
1769     * @throws NullPointerException
1770     *             if {@code seq} is {@code null}.
1771     * @throws IndexOutOfBoundsException
1772     *             if the {@code index} is negative or greater than or equal to
1773     *             the length of {@code seq}.
1774     * @since 1.5
1775     */
1776    public static int codePointAt(CharSequence seq, int index) {
1777        if (seq == null) {
1778            throw new NullPointerException();
1779        }
1780        int len = seq.length();
1781        if (index < 0 || index >= len) {
1782            throw new IndexOutOfBoundsException();
1783        }
1784
1785        char high = seq.charAt(index++);
1786        if (index >= len) {
1787            return high;
1788        }
1789        char low = seq.charAt(index);
1790        if (isSurrogatePair(high, low)) {
1791            return toCodePoint(high, low);
1792        }
1793        return high;
1794    }
1795
1796    /**
1797     * Returns the code point at {@code index} in the specified array of
1798     * character units. If the unit at {@code index} is a high-surrogate unit,
1799     * {@code index + 1} is less than the length of the array and the unit at
1800     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1801     * point represented by the pair is returned; otherwise the {@code char}
1802     * value at {@code index} is returned.
1803     *
1804     * @param seq
1805     *            the source array of {@code char} units.
1806     * @param index
1807     *            the position in {@code seq} from which to retrieve the code
1808     *            point.
1809     * @return the Unicode code point or {@code char} value at {@code index} in
1810     *         {@code seq}.
1811     * @throws NullPointerException
1812     *             if {@code seq} is {@code null}.
1813     * @throws IndexOutOfBoundsException
1814     *             if the {@code index} is negative or greater than or equal to
1815     *             the length of {@code seq}.
1816     * @since 1.5
1817     */
1818    public static int codePointAt(char[] seq, int index) {
1819        if (seq == null) {
1820            throw new NullPointerException();
1821        }
1822        int len = seq.length;
1823        if (index < 0 || index >= len) {
1824            throw new IndexOutOfBoundsException();
1825        }
1826
1827        char high = seq[index++];
1828        if (index >= len) {
1829            return high;
1830        }
1831        char low = seq[index];
1832        if (isSurrogatePair(high, low)) {
1833            return toCodePoint(high, low);
1834        }
1835        return high;
1836    }
1837
1838    /**
1839     * Returns the code point at {@code index} in the specified array of
1840     * character units, where {@code index} has to be less than {@code limit}.
1841     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1842     * is less than {@code limit} and the unit at {@code index + 1} is a
1843     * low-surrogate unit, then the supplementary code point represented by the
1844     * pair is returned; otherwise the {@code char} value at {@code index} is
1845     * returned.
1846     *
1847     * @param seq
1848     *            the source array of {@code char} units.
1849     * @param index
1850     *            the position in {@code seq} from which to get the code point.
1851     * @param limit
1852     *            the index after the last unit in {@code seq} that can be used.
1853     * @return the Unicode code point or {@code char} value at {@code index} in
1854     *         {@code seq}.
1855     * @throws NullPointerException
1856     *             if {@code seq} is {@code null}.
1857     * @throws IndexOutOfBoundsException
1858     *             if {@code index < 0}, {@code index >= limit},
1859     *             {@code limit < 0} or if {@code limit} is greater than the
1860     *             length of {@code seq}.
1861     * @since 1.5
1862     */
1863    public static int codePointAt(char[] seq, int index, int limit) {
1864        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1865            throw new IndexOutOfBoundsException();
1866        }
1867
1868        char high = seq[index++];
1869        if (index >= limit) {
1870            return high;
1871        }
1872        char low = seq[index];
1873        if (isSurrogatePair(high, low)) {
1874            return toCodePoint(high, low);
1875        }
1876        return high;
1877    }
1878
1879    /**
1880     * Returns the code point that precedes {@code index} in the specified
1881     * sequence of character units. If the unit at {@code index - 1} is a
1882     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1883     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1884     * point represented by the pair is returned; otherwise the {@code char}
1885     * value at {@code index - 1} is returned.
1886     *
1887     * @param seq
1888     *            the source sequence of {@code char} units.
1889     * @param index
1890     *            the position in {@code seq} following the code
1891     *            point that should be returned.
1892     * @return the Unicode code point or {@code char} value before {@code index}
1893     *         in {@code seq}.
1894     * @throws NullPointerException
1895     *             if {@code seq} is {@code null}.
1896     * @throws IndexOutOfBoundsException
1897     *             if the {@code index} is less than 1 or greater than the
1898     *             length of {@code seq}.
1899     * @since 1.5
1900     */
1901    public static int codePointBefore(CharSequence seq, int index) {
1902        if (seq == null) {
1903            throw new NullPointerException();
1904        }
1905        int len = seq.length();
1906        if (index < 1 || index > len) {
1907            throw new IndexOutOfBoundsException();
1908        }
1909
1910        char low = seq.charAt(--index);
1911        if (--index < 0) {
1912            return low;
1913        }
1914        char high = seq.charAt(index);
1915        if (isSurrogatePair(high, low)) {
1916            return toCodePoint(high, low);
1917        }
1918        return low;
1919    }
1920
1921    /**
1922     * Returns the code point that precedes {@code index} in the specified
1923     * array of character units. If the unit at {@code index - 1} is a
1924     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1925     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1926     * point represented by the pair is returned; otherwise the {@code char}
1927     * value at {@code index - 1} is returned.
1928     *
1929     * @param seq
1930     *            the source array of {@code char} units.
1931     * @param index
1932     *            the position in {@code seq} following the code
1933     *            point that should be returned.
1934     * @return the Unicode code point or {@code char} value before {@code index}
1935     *         in {@code seq}.
1936     * @throws NullPointerException
1937     *             if {@code seq} is {@code null}.
1938     * @throws IndexOutOfBoundsException
1939     *             if the {@code index} is less than 1 or greater than the
1940     *             length of {@code seq}.
1941     * @since 1.5
1942     */
1943    public static int codePointBefore(char[] seq, int index) {
1944        if (seq == null) {
1945            throw new NullPointerException();
1946        }
1947        int len = seq.length;
1948        if (index < 1 || index > len) {
1949            throw new IndexOutOfBoundsException();
1950        }
1951
1952        char low = seq[--index];
1953        if (--index < 0) {
1954            return low;
1955        }
1956        char high = seq[index];
1957        if (isSurrogatePair(high, low)) {
1958            return toCodePoint(high, low);
1959        }
1960        return low;
1961    }
1962
1963    /**
1964     * Returns the code point that precedes the {@code index} in the specified
1965     * array of character units and is not less than {@code start}. If the unit
1966     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1967     * less than {@code start} and the unit at {@code index - 2} is a
1968     * high-surrogate unit, then the supplementary code point represented by the
1969     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1970     * is returned.
1971     *
1972     * @param seq
1973     *            the source array of {@code char} units.
1974     * @param index
1975     *            the position in {@code seq} following the code point that
1976     *            should be returned.
1977     * @param start
1978     *            the index of the first element in {@code seq}.
1979     * @return the Unicode code point or {@code char} value before {@code index}
1980     *         in {@code seq}.
1981     * @throws NullPointerException
1982     *             if {@code seq} is {@code null}.
1983     * @throws IndexOutOfBoundsException
1984     *             if the {@code index <= start}, {@code start < 0},
1985     *             {@code index} is greater than the length of {@code seq}, or
1986     *             if {@code start} is equal or greater than the length of
1987     *             {@code seq}.
1988     * @since 1.5
1989     */
1990    public static int codePointBefore(char[] seq, int index, int start) {
1991        if (seq == null) {
1992            throw new NullPointerException();
1993        }
1994        int len = seq.length;
1995        if (index <= start || index > len || start < 0 || start >= len) {
1996            throw new IndexOutOfBoundsException();
1997        }
1998
1999        char low = seq[--index];
2000        if (--index < start) {
2001            return low;
2002        }
2003        char high = seq[index];
2004        if (isSurrogatePair(high, low)) {
2005            return toCodePoint(high, low);
2006        }
2007        return low;
2008    }
2009
2010    /**
2011     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2012     * and copies the value(s) into the char array {@code dst}, starting at
2013     * index {@code dstIndex}.
2014     *
2015     * @param codePoint
2016     *            the Unicode code point to encode.
2017     * @param dst
2018     *            the destination array to copy the encoded value into.
2019     * @param dstIndex
2020     *            the index in {@code dst} from where to start copying.
2021     * @return the number of {@code char} value units copied into {@code dst}.
2022     * @throws IllegalArgumentException
2023     *             if {@code codePoint} is not a valid Unicode code point.
2024     * @throws NullPointerException
2025     *             if {@code dst} is {@code null}.
2026     * @throws IndexOutOfBoundsException
2027     *             if {@code dstIndex} is negative, greater than or equal to
2028     *             {@code dst.length} or equals {@code dst.length - 1} when
2029     *             {@code codePoint} is a
2030     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
2031     * @since 1.5
2032     */
2033    public static int toChars(int codePoint, char[] dst, int dstIndex) {
2034        if (!isValidCodePoint(codePoint)) {
2035            throw new IllegalArgumentException();
2036        }
2037        if (dst == null) {
2038            throw new NullPointerException();
2039        }
2040        if (dstIndex < 0 || dstIndex >= dst.length) {
2041            throw new IndexOutOfBoundsException();
2042        }
2043
2044        if (isSupplementaryCodePoint(codePoint)) {
2045            if (dstIndex == dst.length - 1) {
2046                throw new IndexOutOfBoundsException();
2047            }
2048            // See RFC 2781, Section 2.1
2049            // http://www.ietf.org/rfc/rfc2781.txt
2050            int cpPrime = codePoint - 0x10000;
2051            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2052            int low = 0xDC00 | (cpPrime & 0x3FF);
2053            dst[dstIndex] = (char) high;
2054            dst[dstIndex + 1] = (char) low;
2055            return 2;
2056        }
2057
2058        dst[dstIndex] = (char) codePoint;
2059        return 1;
2060    }
2061
2062    /**
2063     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2064     * and returns it as a char array.
2065     *
2066     * @param codePoint
2067     *            the Unicode code point to encode.
2068     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
2069     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
2070     *         then the returned array contains two characters, otherwise it
2071     *         contains just one character.
2072     * @throws IllegalArgumentException
2073     *             if {@code codePoint} is not a valid Unicode code point.
2074     * @since 1.5
2075     */
2076    public static char[] toChars(int codePoint) {
2077        if (!isValidCodePoint(codePoint)) {
2078            throw new IllegalArgumentException();
2079        }
2080
2081        if (isSupplementaryCodePoint(codePoint)) {
2082            int cpPrime = codePoint - 0x10000;
2083            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2084            int low = 0xDC00 | (cpPrime & 0x3FF);
2085            return new char[] { (char) high, (char) low };
2086        }
2087        return new char[] { (char) codePoint };
2088    }
2089
2090    /**
2091     * Counts the number of Unicode code points in the subsequence of the
2092     * specified character sequence, as delineated by {@code beginIndex} and
2093     * {@code endIndex}. Any surrogate values with missing pair values will be
2094     * counted as one code point.
2095     *
2096     * @param seq
2097     *            the {@code CharSequence} to look through.
2098     * @param beginIndex
2099     *            the inclusive index to begin counting at.
2100     * @param endIndex
2101     *            the exclusive index to stop counting at.
2102     * @return the number of Unicode code points.
2103     * @throws NullPointerException
2104     *             if {@code seq} is {@code null}.
2105     * @throws IndexOutOfBoundsException
2106     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2107     *             if {@code endIndex} is greater than the length of {@code seq}.
2108     * @since 1.5
2109     */
2110    public static int codePointCount(CharSequence seq, int beginIndex,
2111            int endIndex) {
2112        if (seq == null) {
2113            throw new NullPointerException();
2114        }
2115        int len = seq.length();
2116        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2117            throw new IndexOutOfBoundsException();
2118        }
2119
2120        int result = 0;
2121        for (int i = beginIndex; i < endIndex; i++) {
2122            char c = seq.charAt(i);
2123            if (isHighSurrogate(c)) {
2124                if (++i < endIndex) {
2125                    c = seq.charAt(i);
2126                    if (!isLowSurrogate(c)) {
2127                        result++;
2128                    }
2129                }
2130            }
2131            result++;
2132        }
2133        return result;
2134    }
2135
2136    /**
2137     * Counts the number of Unicode code points in the subsequence of the
2138     * specified char array, as delineated by {@code offset} and {@code count}.
2139     * Any surrogate values with missing pair values will be counted as one code
2140     * point.
2141     *
2142     * @param seq
2143     *            the char array to look through
2144     * @param offset
2145     *            the inclusive index to begin counting at.
2146     * @param count
2147     *            the number of {@code char} values to look through in
2148     *            {@code seq}.
2149     * @return the number of Unicode code points.
2150     * @throws NullPointerException
2151     *             if {@code seq} is {@code null}.
2152     * @throws IndexOutOfBoundsException
2153     *             if {@code offset < 0}, {@code count < 0} or if
2154     *             {@code offset + count} is greater than the length of
2155     *             {@code seq}.
2156     * @since 1.5
2157     */
2158    public static int codePointCount(char[] seq, int offset, int count) {
2159        if (seq == null) {
2160            throw new NullPointerException();
2161        }
2162        int len = seq.length;
2163        int endIndex = offset + count;
2164        if (offset < 0 || count < 0 || endIndex > len) {
2165            throw new IndexOutOfBoundsException();
2166        }
2167
2168        int result = 0;
2169        for (int i = offset; i < endIndex; i++) {
2170            char c = seq[i];
2171            if (isHighSurrogate(c)) {
2172                if (++i < endIndex) {
2173                    c = seq[i];
2174                    if (!isLowSurrogate(c)) {
2175                        result++;
2176                    }
2177                }
2178            }
2179            result++;
2180        }
2181        return result;
2182    }
2183
2184    /**
2185     * Determines the index in the specified character sequence that is offset
2186     * {@code codePointOffset} code points from {@code index}.
2187     *
2188     * @param seq
2189     *            the character sequence to find the index in.
2190     * @param index
2191     *            the start index in {@code seq}.
2192     * @param codePointOffset
2193     *            the number of code points to look backwards or forwards; may
2194     *            be a negative or positive value.
2195     * @return the index in {@code seq} that is {@code codePointOffset} code
2196     *         points away from {@code index}.
2197     * @throws NullPointerException
2198     *             if {@code seq} is {@code null}.
2199     * @throws IndexOutOfBoundsException
2200     *             if {@code index < 0}, {@code index} is greater than the
2201     *             length of {@code seq}, or if there are not enough values in
2202     *             {@code seq} to skip {@code codePointOffset} code points
2203     *             forwards or backwards (if {@code codePointOffset} is
2204     *             negative) from {@code index}.
2205     * @since 1.5
2206     */
2207    public static int offsetByCodePoints(CharSequence seq, int index,
2208            int codePointOffset) {
2209        if (seq == null) {
2210            throw new NullPointerException();
2211        }
2212        int len = seq.length();
2213        if (index < 0 || index > len) {
2214            throw new IndexOutOfBoundsException();
2215        }
2216
2217        if (codePointOffset == 0) {
2218            return index;
2219        }
2220
2221        if (codePointOffset > 0) {
2222            int codePoints = codePointOffset;
2223            int i = index;
2224            while (codePoints > 0) {
2225                codePoints--;
2226                if (i >= len) {
2227                    throw new IndexOutOfBoundsException();
2228                }
2229                if (isHighSurrogate(seq.charAt(i))) {
2230                    int next = i + 1;
2231                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2232                        i++;
2233                    }
2234                }
2235                i++;
2236            }
2237            return i;
2238        }
2239
2240        assert codePointOffset < 0;
2241        int codePoints = -codePointOffset;
2242        int i = index;
2243        while (codePoints > 0) {
2244            codePoints--;
2245            i--;
2246            if (i < 0) {
2247                throw new IndexOutOfBoundsException();
2248            }
2249            if (isLowSurrogate(seq.charAt(i))) {
2250                int prev = i - 1;
2251                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2252                    i--;
2253                }
2254            }
2255        }
2256        return i;
2257    }
2258
2259    /**
2260     * Determines the index in a subsequence of the specified character array
2261     * that is offset {@code codePointOffset} code points from {@code index}.
2262     * The subsequence is delineated by {@code start} and {@code count}.
2263     *
2264     * @param seq
2265     *            the character array to find the index in.
2266     * @param start
2267     *            the inclusive index that marks the beginning of the
2268     *            subsequence.
2269     * @param count
2270     *            the number of {@code char} values to include within the
2271     *            subsequence.
2272     * @param index
2273     *            the start index in the subsequence of the char array.
2274     * @param codePointOffset
2275     *            the number of code points to look backwards or forwards; may
2276     *            be a negative or positive value.
2277     * @return the index in {@code seq} that is {@code codePointOffset} code
2278     *         points away from {@code index}.
2279     * @throws NullPointerException
2280     *             if {@code seq} is {@code null}.
2281     * @throws IndexOutOfBoundsException
2282     *             if {@code start < 0}, {@code count < 0},
2283     *             {@code index < start}, {@code index > start + count},
2284     *             {@code start + count} is greater than the length of
2285     *             {@code seq}, or if there are not enough values in
2286     *             {@code seq} to skip {@code codePointOffset} code points
2287     *             forward or backward (if {@code codePointOffset} is
2288     *             negative) from {@code index}.
2289     * @since 1.5
2290     */
2291    public static int offsetByCodePoints(char[] seq, int start, int count,
2292            int index, int codePointOffset) {
2293        if (seq == null) {
2294            throw new NullPointerException();
2295        }
2296        int end = start + count;
2297        if (start < 0 || count < 0 || end > seq.length || index < start
2298                || index > end) {
2299            throw new IndexOutOfBoundsException();
2300        }
2301
2302        if (codePointOffset == 0) {
2303            return index;
2304        }
2305
2306        if (codePointOffset > 0) {
2307            int codePoints = codePointOffset;
2308            int i = index;
2309            while (codePoints > 0) {
2310                codePoints--;
2311                if (i >= end) {
2312                    throw new IndexOutOfBoundsException();
2313                }
2314                if (isHighSurrogate(seq[i])) {
2315                    int next = i + 1;
2316                    if (next < end && isLowSurrogate(seq[next])) {
2317                        i++;
2318                    }
2319                }
2320                i++;
2321            }
2322            return i;
2323        }
2324
2325        assert codePointOffset < 0;
2326        int codePoints = -codePointOffset;
2327        int i = index;
2328        while (codePoints > 0) {
2329            codePoints--;
2330            i--;
2331            if (i < start) {
2332                throw new IndexOutOfBoundsException();
2333            }
2334            if (isLowSurrogate(seq[i])) {
2335                int prev = i - 1;
2336                if (prev >= start && isHighSurrogate(seq[prev])) {
2337                    i--;
2338                }
2339            }
2340        }
2341        return i;
2342    }
2343
2344    /**
2345     * Convenience method to determine the value of the specified character
2346     * {@code c} in the supplied radix. The value of {@code radix} must be
2347     * between MIN_RADIX and MAX_RADIX.
2348     *
2349     * @param c
2350     *            the character to determine the value of.
2351     * @param radix
2352     *            the radix.
2353     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2354     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2355     */
2356    public static int digit(char c, int radix) {
2357        return digit((int) c, radix);
2358    }
2359
2360    /**
2361     * Convenience method to determine the value of the character
2362     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2363     * be between MIN_RADIX and MAX_RADIX.
2364     *
2365     * @param codePoint
2366     *            the character, including supplementary characters.
2367     * @param radix
2368     *            the radix.
2369     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2370     *         {@link #MAX_RADIX} then the value of the character in the radix;
2371     *         -1 otherwise.
2372     */
2373    public static int digit(int codePoint, int radix) {
2374        if (radix < MIN_RADIX || radix > MAX_RADIX) {
2375            return -1;
2376        }
2377        if (codePoint < 128) {
2378            // Optimized for ASCII
2379            int result = -1;
2380            if ('0' <= codePoint && codePoint <= '9') {
2381                result = codePoint - '0';
2382            } else if ('a' <= codePoint && codePoint <= 'z') {
2383                result = 10 + (codePoint - 'a');
2384            } else if ('A' <= codePoint && codePoint <= 'Z') {
2385                result = 10 + (codePoint - 'A');
2386            }
2387            return result < radix ? result : -1;
2388        }
2389        return digitImpl(codePoint, radix);
2390    }
2391
2392    private static native int digitImpl(int codePoint, int radix);
2393
2394    /**
2395     * Compares this object with the specified object and indicates if they are
2396     * equal. In order to be equal, {@code object} must be an instance of
2397     * {@code Character} and have the same char value as this object.
2398     *
2399     * @param object
2400     *            the object to compare this double with.
2401     * @return {@code true} if the specified object is equal to this
2402     *         {@code Character}; {@code false} otherwise.
2403     */
2404    @Override
2405    public boolean equals(Object object) {
2406        return (object instanceof Character) && (value == ((Character) object).value);
2407    }
2408
2409    /**
2410     * Returns the character which represents the specified digit in the
2411     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2412     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2413     * smaller than {@code radix}. If any of these conditions does not hold, 0
2414     * is returned.
2415     *
2416     * @param digit
2417     *            the integer value.
2418     * @param radix
2419     *            the radix.
2420     * @return the character which represents the {@code digit} in the
2421     *         {@code radix}.
2422     */
2423    public static char forDigit(int digit, int radix) {
2424        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2425            if (digit >= 0 && digit < radix) {
2426                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2427            }
2428        }
2429        return 0;
2430    }
2431
2432    /**
2433     * Returns the numeric value of the specified Unicode character.
2434     * See {@link #getNumericValue(int)}.
2435     *
2436     * @param c the character
2437     * @return a non-negative numeric integer value if a numeric value for
2438     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2439     *         -2 if the numeric value can not be represented as an integer.
2440     */
2441    public static int getNumericValue(char c) {
2442        return getNumericValue((int) c);
2443    }
2444
2445    /**
2446     * Gets the numeric value of the specified Unicode code point. For example,
2447     * the code point '\u216B' stands for the Roman number XII, which has the
2448     * numeric value 12.
2449     *
2450     * <p>There are two points of divergence between this method and the Unicode
2451     * specification. This method treats the letters a-z (in both upper and lower
2452     * cases, and their full-width variants) as numbers from 10 to 35. The
2453     * Unicode specification also supports the idea of code points with non-integer
2454     * numeric values; this method does not (except to the extent of returning -2
2455     * for such code points).
2456     *
2457     * @param codePoint the code point
2458     * @return a non-negative numeric integer value if a numeric value for
2459     *         {@code codePoint} exists, -1 if there is no numeric value for
2460     *         {@code codePoint}, -2 if the numeric value can not be
2461     *         represented with an integer.
2462     */
2463    public static int getNumericValue(int codePoint) {
2464        // This is both an optimization and papers over differences between Java and ICU.
2465        if (codePoint < 128) {
2466            if (codePoint >= '0' && codePoint <= '9') {
2467                return codePoint - '0';
2468            }
2469            if (codePoint >= 'a' && codePoint <= 'z') {
2470                return codePoint - ('a' - 10);
2471            }
2472            if (codePoint >= 'A' && codePoint <= 'Z') {
2473                return codePoint - ('A' - 10);
2474            }
2475            return -1;
2476        }
2477        // Full-width uppercase A-Z.
2478        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
2479            return codePoint - 0xff17;
2480        }
2481        // Full-width lowercase a-z.
2482        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
2483            return codePoint - 0xff37;
2484        }
2485        return getNumericValueImpl(codePoint);
2486    }
2487
2488    private static native int getNumericValueImpl(int codePoint);
2489
2490    /**
2491     * Gets the general Unicode category of the specified character.
2492     *
2493     * @param c
2494     *            the character to get the category of.
2495     * @return the Unicode category of {@code c}.
2496     */
2497    public static int getType(char c) {
2498        return getType((int) c);
2499    }
2500
2501    /**
2502     * Gets the general Unicode category of the specified code point.
2503     *
2504     * @param codePoint
2505     *            the Unicode code point to get the category of.
2506     * @return the Unicode category of {@code codePoint}.
2507     */
2508    public static int getType(int codePoint) {
2509        int type = getTypeImpl(codePoint);
2510        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
2511        if (type <= Character.FORMAT) {
2512            return type;
2513        }
2514        return (type + 1);
2515    }
2516
2517    private static native int getTypeImpl(int codePoint);
2518
2519    /**
2520     * Gets the Unicode directionality of the specified character.
2521     *
2522     * @param c
2523     *            the character to get the directionality of.
2524     * @return the Unicode directionality of {@code c}.
2525     */
2526    public static byte getDirectionality(char c) {
2527        return getDirectionality((int)c);
2528    }
2529
2530    /**
2531     * Gets the Unicode directionality of the specified character.
2532     *
2533     * @param codePoint
2534     *            the Unicode code point to get the directionality of.
2535     * @return the Unicode directionality of {@code codePoint}.
2536     */
2537    public static byte getDirectionality(int codePoint) {
2538        if (getType(codePoint) == Character.UNASSIGNED) {
2539            return Character.DIRECTIONALITY_UNDEFINED;
2540        }
2541
2542        byte directionality = getDirectionalityImpl(codePoint);
2543        if (directionality == -1) {
2544            return -1;
2545        }
2546        return DIRECTIONALITY[directionality];
2547    }
2548
2549    private static native byte getDirectionalityImpl(int codePoint);
2550
2551    /**
2552     * Indicates whether the specified character is mirrored.
2553     *
2554     * @param c
2555     *            the character to check.
2556     * @return {@code true} if {@code c} is mirrored; {@code false}
2557     *         otherwise.
2558     */
2559    public static boolean isMirrored(char c) {
2560        return isMirrored((int) c);
2561    }
2562
2563    /**
2564     * Indicates whether the specified code point is mirrored.
2565     *
2566     * @param codePoint
2567     *            the code point to check.
2568     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2569     *         otherwise.
2570     */
2571    public static boolean isMirrored(int codePoint) {
2572        return isMirroredImpl(codePoint);
2573    }
2574
2575    private static native boolean isMirroredImpl(int codePoint);
2576
2577    @Override
2578    public int hashCode() {
2579        return value;
2580    }
2581
2582    /**
2583     * Indicates whether the specified character is defined in the Unicode
2584     * specification.
2585     *
2586     * @param c
2587     *            the character to check.
2588     * @return {@code true} if the general Unicode category of the character is
2589     *         not {@code UNASSIGNED}; {@code false} otherwise.
2590     */
2591    public static boolean isDefined(char c) {
2592        return isDefinedImpl(c);
2593    }
2594
2595    /**
2596     * Indicates whether the specified code point is defined in the Unicode
2597     * specification.
2598     *
2599     * @param codePoint
2600     *            the code point to check.
2601     * @return {@code true} if the general Unicode category of the code point is
2602     *         not {@code UNASSIGNED}; {@code false} otherwise.
2603     */
2604    public static boolean isDefined(int codePoint) {
2605        return isDefinedImpl(codePoint);
2606    }
2607
2608    private static native boolean isDefinedImpl(int codePoint);
2609
2610    /**
2611     * Indicates whether the specified character is a digit.
2612     *
2613     * @param c
2614     *            the character to check.
2615     * @return {@code true} if {@code c} is a digit; {@code false}
2616     *         otherwise.
2617     */
2618    public static boolean isDigit(char c) {
2619        return isDigit((int) c);
2620    }
2621
2622    /**
2623     * Indicates whether the specified code point is a digit.
2624     *
2625     * @param codePoint
2626     *            the code point to check.
2627     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2628     *         otherwise.
2629     */
2630    public static boolean isDigit(int codePoint) {
2631        // Optimized case for ASCII
2632        if ('0' <= codePoint && codePoint <= '9') {
2633            return true;
2634        }
2635        if (codePoint < 1632) {
2636            return false;
2637        }
2638        return isDigitImpl(codePoint);
2639    }
2640
2641    private static native boolean isDigitImpl(int codePoint);
2642
2643    /**
2644     * Indicates whether the specified character is ignorable in a Java or
2645     * Unicode identifier.
2646     *
2647     * @param c
2648     *            the character to check.
2649     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2650     */
2651    public static boolean isIdentifierIgnorable(char c) {
2652        return isIdentifierIgnorable((int) c);
2653    }
2654
2655    /**
2656     * Indicates whether the specified code point is ignorable in a Java or
2657     * Unicode identifier.
2658     *
2659     * @param codePoint
2660     *            the code point to check.
2661     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2662     *         otherwise.
2663     */
2664    public static boolean isIdentifierIgnorable(int codePoint) {
2665        // This is both an optimization and papers over differences between Java and ICU.
2666        if (codePoint < 0x600) {
2667            return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
2668                    (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
2669        }
2670        return isIdentifierIgnorableImpl(codePoint);
2671    }
2672
2673    private static native boolean isIdentifierIgnorableImpl(int codePoint);
2674
2675    /**
2676     * Indicates whether the specified character is an ISO control character.
2677     *
2678     * @param c
2679     *            the character to check.
2680     * @return {@code true} if {@code c} is an ISO control character;
2681     *         {@code false} otherwise.
2682     */
2683    public static boolean isISOControl(char c) {
2684        return isISOControl((int) c);
2685    }
2686
2687    /**
2688     * Indicates whether the specified code point is an ISO control character.
2689     *
2690     * @param c
2691     *            the code point to check.
2692     * @return {@code true} if {@code c} is an ISO control character;
2693     *         {@code false} otherwise.
2694     */
2695    public static boolean isISOControl(int c) {
2696        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2697    }
2698
2699    /**
2700     * Indicates whether the specified character is a valid part of a Java
2701     * identifier other than the first character.
2702     *
2703     * @param c
2704     *            the character to check.
2705     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2706     *         {@code false} otherwise.
2707     */
2708    public static boolean isJavaIdentifierPart(char c) {
2709        // BEGIN android-changed
2710        return isJavaIdentifierPart((int) c);
2711        // END android-changed
2712    }
2713
2714    /**
2715     * Indicates whether the specified code point is a valid part of a Java
2716     * identifier other than the first character.
2717     *
2718     * @param codePoint
2719     *            the code point to check.
2720     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2721     *         {@code false} otherwise.
2722     */
2723    public static boolean isJavaIdentifierPart(int codePoint) {
2724        // BEGIN android-changed: use precomputed bitmasks for the ASCII range.
2725        // Optimized case for ASCII
2726        if (codePoint < 64) {
2727            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
2728        } else if (codePoint < 128) {
2729            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2730        }
2731        int type = getType(codePoint);
2732        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2733                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2734                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2735                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2736                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
2737                || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
2738        // END android-changed
2739    }
2740
2741    /**
2742     * Indicates whether the specified character is a valid first character for
2743     * a Java identifier.
2744     *
2745     * @param c
2746     *            the character to check.
2747     * @return {@code true} if {@code c} is a valid first character of a Java
2748     *         identifier; {@code false} otherwise.
2749     */
2750    public static boolean isJavaIdentifierStart(char c) {
2751        // BEGIN android-changed
2752        return isJavaIdentifierStart((int) c);
2753        // END android-changed
2754    }
2755
2756    /**
2757     * Indicates whether the specified code point is a valid first character for
2758     * a Java identifier.
2759     *
2760     * @param codePoint
2761     *            the code point to check.
2762     * @return {@code true} if {@code codePoint} is a valid start of a Java
2763     *         identifier; {@code false} otherwise.
2764     */
2765    public static boolean isJavaIdentifierStart(int codePoint) {
2766        // BEGIN android-changed: use precomputed bitmasks for the ASCII range.
2767        // Optimized case for ASCII
2768        if (codePoint < 64) {
2769            return (codePoint == '$'); // There's only one character in this range.
2770        } else if (codePoint < 128) {
2771            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2772        }
2773        int type = getType(codePoint);
2774        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
2775                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2776        // END android-changed
2777    }
2778
2779    /**
2780     * Indicates whether the specified character is a Java letter.
2781     *
2782     * @param c
2783     *            the character to check.
2784     * @return {@code true} if {@code c} is a Java letter; {@code false}
2785     *         otherwise.
2786     * @deprecated Use {@link #isJavaIdentifierStart(char)}
2787     */
2788    @Deprecated
2789    public static boolean isJavaLetter(char c) {
2790        return isJavaIdentifierStart(c);
2791    }
2792
2793    /**
2794     * Indicates whether the specified character is a Java letter or digit
2795     * character.
2796     *
2797     * @param c
2798     *            the character to check.
2799     * @return {@code true} if {@code c} is a Java letter or digit;
2800     *         {@code false} otherwise.
2801     * @deprecated Use {@link #isJavaIdentifierPart(char)}
2802     */
2803    @Deprecated
2804    public static boolean isJavaLetterOrDigit(char c) {
2805        return isJavaIdentifierPart(c);
2806    }
2807
2808    /**
2809     * Indicates whether the specified character is a letter.
2810     *
2811     * @param c
2812     *            the character to check.
2813     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2814     */
2815    public static boolean isLetter(char c) {
2816        return isLetter((int) c);
2817    }
2818
2819    /**
2820     * Indicates whether the specified code point is a letter.
2821     *
2822     * @param codePoint
2823     *            the code point to check.
2824     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2825     *         otherwise.
2826     */
2827    public static boolean isLetter(int codePoint) {
2828        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2829            return true;
2830        }
2831        if (codePoint < 128) {
2832            return false;
2833        }
2834        return isLetterImpl(codePoint);
2835    }
2836
2837    private static native boolean isLetterImpl(int codePoint);
2838
2839    /**
2840     * Indicates whether the specified character is a letter or a digit.
2841     *
2842     * @param c
2843     *            the character to check.
2844     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2845     *         otherwise.
2846     */
2847    public static boolean isLetterOrDigit(char c) {
2848        return isLetterOrDigit((int) c);
2849    }
2850
2851    /**
2852     * Indicates whether the specified code point is a letter or a digit.
2853     *
2854     * @param codePoint
2855     *            the code point to check.
2856     * @return {@code true} if {@code codePoint} is a letter or a digit;
2857     *         {@code false} otherwise.
2858     */
2859    public static boolean isLetterOrDigit(int codePoint) {
2860        // Optimized case for ASCII
2861        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2862            return true;
2863        }
2864        if ('0' <= codePoint && codePoint <= '9') {
2865            return true;
2866        }
2867        if (codePoint < 128) {
2868            return false;
2869        }
2870        return isLetterOrDigitImpl(codePoint);
2871    }
2872
2873    private static native boolean isLetterOrDigitImpl(int codePoint);
2874
2875    /**
2876     * Indicates whether the specified character is a lower case letter.
2877     *
2878     * @param c
2879     *            the character to check.
2880     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2881     *         otherwise.
2882     */
2883    public static boolean isLowerCase(char c) {
2884        return isLowerCase((int) c);
2885    }
2886
2887    /**
2888     * Indicates whether the specified code point is a lower case letter.
2889     *
2890     * @param codePoint
2891     *            the code point to check.
2892     * @return {@code true} if {@code codePoint} is a lower case letter;
2893     *         {@code false} otherwise.
2894     */
2895    public static boolean isLowerCase(int codePoint) {
2896        // Optimized case for ASCII
2897        if ('a' <= codePoint && codePoint <= 'z') {
2898            return true;
2899        }
2900        if (codePoint < 128) {
2901            return false;
2902        }
2903        return isLowerCaseImpl(codePoint);
2904    }
2905
2906    private static native boolean isLowerCaseImpl(int codePoint);
2907
2908    /**
2909     * Indicates whether the specified character is a Java space.
2910     *
2911     * @param c
2912     *            the character to check.
2913     * @return {@code true} if {@code c} is a Java space; {@code false}
2914     *         otherwise.
2915     * @deprecated Use {@link #isWhitespace(char)}
2916     */
2917    @Deprecated
2918    public static boolean isSpace(char c) {
2919        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2920    }
2921
2922    /**
2923     * Indicates whether the specified character is a Unicode space character.
2924     * That is, if it is a member of one of the Unicode categories Space
2925     * Separator, Line Separator, or Paragraph Separator.
2926     *
2927     * @param c
2928     *            the character to check.
2929     * @return {@code true} if {@code c} is a Unicode space character,
2930     *         {@code false} otherwise.
2931     */
2932    public static boolean isSpaceChar(char c) {
2933        return isSpaceChar((int) c);
2934    }
2935
2936    /**
2937     * Indicates whether the specified code point is a Unicode space character.
2938     * That is, if it is a member of one of the Unicode categories Space
2939     * Separator, Line Separator, or Paragraph Separator.
2940     *
2941     * @param codePoint
2942     *            the code point to check.
2943     * @return {@code true} if {@code codePoint} is a Unicode space character,
2944     *         {@code false} otherwise.
2945     */
2946    public static boolean isSpaceChar(int codePoint) {
2947        if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
2948            return true;
2949        }
2950        if (codePoint < 0x2000) {
2951            return false;
2952        }
2953        if (codePoint <= 0xffff) {
2954            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
2955                    codePoint == 0x202f || codePoint == 0x3000;
2956        }
2957        return isSpaceCharImpl(codePoint);
2958    }
2959
2960    private static native boolean isSpaceCharImpl(int codePoint);
2961
2962    /**
2963     * Indicates whether the specified character is a titlecase character.
2964     *
2965     * @param c
2966     *            the character to check.
2967     * @return {@code true} if {@code c} is a titlecase character, {@code false}
2968     *         otherwise.
2969     */
2970    public static boolean isTitleCase(char c) {
2971        return isTitleCaseImpl(c);
2972    }
2973
2974    /**
2975     * Indicates whether the specified code point is a titlecase character.
2976     *
2977     * @param codePoint
2978     *            the code point to check.
2979     * @return {@code true} if {@code codePoint} is a titlecase character,
2980     *         {@code false} otherwise.
2981     */
2982    public static boolean isTitleCase(int codePoint) {
2983        return isTitleCaseImpl(codePoint);
2984    }
2985
2986    private static native boolean isTitleCaseImpl(int codePoint);
2987
2988    /**
2989     * Indicates whether the specified character is valid as part of a Unicode
2990     * identifier other than the first character.
2991     *
2992     * @param c
2993     *            the character to check.
2994     * @return {@code true} if {@code c} is valid as part of a Unicode
2995     *         identifier; {@code false} otherwise.
2996     */
2997    public static boolean isUnicodeIdentifierPart(char c) {
2998        return isUnicodeIdentifierPartImpl(c);
2999    }
3000
3001    /**
3002     * Indicates whether the specified code point is valid as part of a Unicode
3003     * identifier other than the first character.
3004     *
3005     * @param codePoint
3006     *            the code point to check.
3007     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
3008     *         identifier; {@code false} otherwise.
3009     */
3010    public static boolean isUnicodeIdentifierPart(int codePoint) {
3011        return isUnicodeIdentifierPartImpl(codePoint);
3012    }
3013
3014    private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
3015
3016    /**
3017     * Indicates whether the specified character is a valid initial character
3018     * for a Unicode identifier.
3019     *
3020     * @param c
3021     *            the character to check.
3022     * @return {@code true} if {@code c} is a valid first character for a
3023     *         Unicode identifier; {@code false} otherwise.
3024     */
3025    public static boolean isUnicodeIdentifierStart(char c) {
3026        return isUnicodeIdentifierStartImpl(c);
3027    }
3028
3029    /**
3030     * Indicates whether the specified code point is a valid initial character
3031     * for a Unicode identifier.
3032     *
3033     * @param codePoint
3034     *            the code point to check.
3035     * @return {@code true} if {@code codePoint} is a valid first character for
3036     *         a Unicode identifier; {@code false} otherwise.
3037     */
3038    public static boolean isUnicodeIdentifierStart(int codePoint) {
3039        return isUnicodeIdentifierStartImpl(codePoint);
3040    }
3041
3042    private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
3043
3044    /**
3045     * Indicates whether the specified character is an upper case letter.
3046     *
3047     * @param c
3048     *            the character to check.
3049     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3050     *         otherwise.
3051     */
3052    public static boolean isUpperCase(char c) {
3053        return isUpperCase((int) c);
3054    }
3055
3056    /**
3057     * Indicates whether the specified code point is an upper case letter.
3058     *
3059     * @param codePoint
3060     *            the code point to check.
3061     * @return {@code true} if {@code codePoint} is a upper case letter;
3062     *         {@code false} otherwise.
3063     */
3064    public static boolean isUpperCase(int codePoint) {
3065        // Optimized case for ASCII
3066        if ('A' <= codePoint && codePoint <= 'Z') {
3067            return true;
3068        }
3069        if (codePoint < 128) {
3070            return false;
3071        }
3072        return isUpperCaseImpl(codePoint);
3073    }
3074
3075    private static native boolean isUpperCaseImpl(int codePoint);
3076
3077    /**
3078     * Indicates whether the specified character is a whitespace character in
3079     * Java.
3080     *
3081     * @param c
3082     *            the character to check.
3083     * @return {@code true} if the supplied {@code c} is a whitespace character
3084     *         in Java; {@code false} otherwise.
3085     */
3086    public static boolean isWhitespace(char c) {
3087        return isWhitespace((int) c);
3088    }
3089
3090    /**
3091     * Indicates whether the specified code point is a whitespace character in
3092     * Java.
3093     *
3094     * @param codePoint
3095     *            the code point to check.
3096     * @return {@code true} if the supplied {@code c} is a whitespace character
3097     *         in Java; {@code false} otherwise.
3098     */
3099    public static boolean isWhitespace(int codePoint) {
3100        // This is both an optimization and papers over differences between Java and ICU.
3101        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
3102            return true;
3103        }
3104        if (codePoint == 0x1680) {
3105            return true;
3106        }
3107        if (codePoint < 0x2000 || codePoint == 0x2007) {
3108            return false;
3109        }
3110        if (codePoint <= 0xffff) {
3111            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3112                    codePoint == 0x3000;
3113        }
3114        return isWhitespaceImpl(codePoint);
3115    }
3116
3117    private static native boolean isWhitespaceImpl(int codePoint);
3118
3119    /**
3120     * Reverses the order of the first and second byte in the specified
3121     * character.
3122     *
3123     * @param c
3124     *            the character to reverse.
3125     * @return the character with reordered bytes.
3126     */
3127    public static char reverseBytes(char c) {
3128        return (char)((c<<8) | (c>>8));
3129    }
3130
3131    /**
3132     * Returns the lower case equivalent for the specified character if the
3133     * character is an upper case letter. Otherwise, the specified character is
3134     * returned unchanged.
3135     *
3136     * @param c
3137     *            the character
3138     * @return if {@code c} is an upper case character then its lower case
3139     *         counterpart, otherwise just {@code c}.
3140     */
3141    public static char toLowerCase(char c) {
3142        return (char) toLowerCase((int) c);
3143    }
3144
3145    /**
3146     * Returns the lower case equivalent for the specified code point if it is
3147     * an upper case letter. Otherwise, the specified code point is returned
3148     * unchanged.
3149     *
3150     * @param codePoint
3151     *            the code point to check.
3152     * @return if {@code codePoint} is an upper case character then its lower
3153     *         case counterpart, otherwise just {@code codePoint}.
3154     */
3155    public static int toLowerCase(int codePoint) {
3156        // Optimized case for ASCII
3157        if ('A' <= codePoint && codePoint <= 'Z') {
3158            return (char) (codePoint + ('a' - 'A'));
3159        }
3160        if (codePoint < 192) {
3161            return codePoint;
3162        }
3163        return toLowerCaseImpl(codePoint);
3164    }
3165
3166    private static native int toLowerCaseImpl(int codePoint);
3167
3168    @Override
3169    public String toString() {
3170        return String.valueOf(value);
3171    }
3172
3173    /**
3174     * Converts the specified character to its string representation.
3175     *
3176     * @param value
3177     *            the character to convert.
3178     * @return the character converted to a string.
3179     */
3180    public static String toString(char value) {
3181        return String.valueOf(value);
3182    }
3183
3184    /**
3185     * Returns the title case equivalent for the specified character if it
3186     * exists. Otherwise, the specified character is returned unchanged.
3187     *
3188     * @param c
3189     *            the character to convert.
3190     * @return the title case equivalent of {@code c} if it exists, otherwise
3191     *         {@code c}.
3192     */
3193    public static char toTitleCase(char c) {
3194        return (char) toTitleCaseImpl(c);
3195    }
3196
3197    /**
3198     * Returns the title case equivalent for the specified code point if it
3199     * exists. Otherwise, the specified code point is returned unchanged.
3200     *
3201     * @param codePoint
3202     *            the code point to convert.
3203     * @return the title case equivalent of {@code codePoint} if it exists,
3204     *         otherwise {@code codePoint}.
3205     */
3206    public static int toTitleCase(int codePoint) {
3207        return toTitleCaseImpl(codePoint);
3208    }
3209
3210    private static native int toTitleCaseImpl(int codePoint);
3211
3212    /**
3213     * Returns the upper case equivalent for the specified character if the
3214     * character is a lower case letter. Otherwise, the specified character is
3215     * returned unchanged.
3216     *
3217     * @param c
3218     *            the character to convert.
3219     * @return if {@code c} is a lower case character then its upper case
3220     *         counterpart, otherwise just {@code c}.
3221     */
3222    public static char toUpperCase(char c) {
3223        return (char) toUpperCase((int) c);
3224    }
3225
3226    /**
3227     * Returns the upper case equivalent for the specified code point if the
3228     * code point is a lower case letter. Otherwise, the specified code point is
3229     * returned unchanged.
3230     *
3231     * @param codePoint
3232     *            the code point to convert.
3233     * @return if {@code codePoint} is a lower case character then its upper
3234     *         case counterpart, otherwise just {@code codePoint}.
3235     */
3236    public static int toUpperCase(int codePoint) {
3237        // Optimized case for ASCII
3238        if ('a' <= codePoint && codePoint <= 'z') {
3239            return (char) (codePoint - ('a' - 'A'));
3240        }
3241        if (codePoint < 181) {
3242            return codePoint;
3243        }
3244        return toUpperCaseImpl(codePoint);
3245    }
3246
3247    private static native int toUpperCaseImpl(int codePoint);
3248}
3249