Character.java revision e26ba79900d471d02d656f686926918ef7dc751f
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21import java.util.Arrays;
22
23/**
24 * The wrapper for the primitive type {@code char}. This class also provides a
25 * number of utility methods for working with characters.
26 *
27 * <p>Character data is kept up to date as Unicode evolves.
28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
29 * the {@code Locale} documentation for details of the Unicode versions implemented by current
30 * and historical Android releases.
31 *
32 * <p>The Unicode specification, character tables, and other information are available at
33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
34 *
35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
39 * encoding and {@code char} pairs are used to represent code points in the
40 * supplementary range. A pair of {@code char} values that represent a
41 * supplementary character are made up of a <i>high surrogate</i> with a value
42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
43 * 0xDC00 to 0xDFFF.
44 * <p>
45 * On the Java platform a {@code char} value represents either a single BMP code
46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
47 * is used to represent all Unicode code points.
48 *
49 * <a name="unicode_categories"><h3>Unicode categories</h3></a>
50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
51 * grouped semantically to provide a convenient overview. This table is also useful in
52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
53 * <span class="datatable">
54 * <style type="text/css">
55 * .datatable td { padding-right: 20px; }
56 * </style>
57 * <p><table>
58 * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
59 * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
60 * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
61 * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
62 * <tr> <td> Cf </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
63 * <tr> <td><br></td> </tr>
64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
67 * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
68 * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
69 * <tr> <td><br></td> </tr>
70 * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
71 * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
73 * <tr> <td><br></td> </tr>
74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
75 * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
76 * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
77 * <tr> <td><br></td> </tr>
78 * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
79 * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
80 * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
81 * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
83 * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
84 * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
85 * <tr> <td><br></td> </tr>
86 * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
89 * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
90 * <tr> <td><br></td> </tr>
91 * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
92 * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
94 * </table>
95 * </span>
96 *
97 * @since 1.0
98 */
99public final class Character implements Serializable, Comparable<Character> {
100    private static final long serialVersionUID = 3786198910865385080L;
101
102    private final char value;
103
104    /**
105     * The minimum {@code Character} value.
106     */
107    public static final char MIN_VALUE = '\u0000';
108
109    /**
110     * The maximum {@code Character} value.
111     */
112    public static final char MAX_VALUE = '\uffff';
113
114    /**
115     * The minimum radix used for conversions between characters and integers.
116     */
117    public static final int MIN_RADIX = 2;
118
119    /**
120     * The maximum radix used for conversions between characters and integers.
121     */
122    public static final int MAX_RADIX = 36;
123
124    /**
125     * The {@link Class} object that represents the primitive type {@code char}.
126     */
127    @SuppressWarnings("unchecked")
128    public static final Class<Character> TYPE
129            = (Class<Character>) char[].class.getComponentType();
130    // Note: Character.TYPE can't be set to "char.class", since *that* is
131    // defined to be "java.lang.Character.TYPE";
132
133    /**
134     * Unicode category constant Cn.
135     */
136    public static final byte UNASSIGNED = 0;
137
138    /**
139     * Unicode category constant Lu.
140     */
141    public static final byte UPPERCASE_LETTER = 1;
142
143    /**
144     * Unicode category constant Ll.
145     */
146    public static final byte LOWERCASE_LETTER = 2;
147
148    /**
149     * Unicode category constant Lt.
150     */
151    public static final byte TITLECASE_LETTER = 3;
152
153    /**
154     * Unicode category constant Lm.
155     */
156    public static final byte MODIFIER_LETTER = 4;
157
158    /**
159     * Unicode category constant Lo.
160     */
161    public static final byte OTHER_LETTER = 5;
162
163    /**
164     * Unicode category constant Mn.
165     */
166    public static final byte NON_SPACING_MARK = 6;
167
168    /**
169     * Unicode category constant Me.
170     */
171    public static final byte ENCLOSING_MARK = 7;
172
173    /**
174     * Unicode category constant Mc.
175     */
176    public static final byte COMBINING_SPACING_MARK = 8;
177
178    /**
179     * Unicode category constant Nd.
180     */
181    public static final byte DECIMAL_DIGIT_NUMBER = 9;
182
183    /**
184     * Unicode category constant Nl.
185     */
186    public static final byte LETTER_NUMBER = 10;
187
188    /**
189     * Unicode category constant No.
190     */
191    public static final byte OTHER_NUMBER = 11;
192
193    /**
194     * Unicode category constant Zs.
195     */
196    public static final byte SPACE_SEPARATOR = 12;
197
198    /**
199     * Unicode category constant Zl.
200     */
201    public static final byte LINE_SEPARATOR = 13;
202
203    /**
204     * Unicode category constant Zp.
205     */
206    public static final byte PARAGRAPH_SEPARATOR = 14;
207
208    /**
209     * Unicode category constant Cc.
210     */
211    public static final byte CONTROL = 15;
212
213    /**
214     * Unicode category constant Cf.
215     */
216    public static final byte FORMAT = 16;
217
218    /**
219     * Unicode category constant Co.
220     */
221    public static final byte PRIVATE_USE = 18;
222
223    /**
224     * Unicode category constant Cs.
225     */
226    public static final byte SURROGATE = 19;
227
228    /**
229     * Unicode category constant Pd.
230     */
231    public static final byte DASH_PUNCTUATION = 20;
232
233    /**
234     * Unicode category constant Ps.
235     */
236    public static final byte START_PUNCTUATION = 21;
237
238    /**
239     * Unicode category constant Pe.
240     */
241    public static final byte END_PUNCTUATION = 22;
242
243    /**
244     * Unicode category constant Pc.
245     */
246    public static final byte CONNECTOR_PUNCTUATION = 23;
247
248    /**
249     * Unicode category constant Po.
250     */
251    public static final byte OTHER_PUNCTUATION = 24;
252
253    /**
254     * Unicode category constant Sm.
255     */
256    public static final byte MATH_SYMBOL = 25;
257
258    /**
259     * Unicode category constant Sc.
260     */
261    public static final byte CURRENCY_SYMBOL = 26;
262
263    /**
264     * Unicode category constant Sk.
265     */
266    public static final byte MODIFIER_SYMBOL = 27;
267
268    /**
269     * Unicode category constant So.
270     */
271    public static final byte OTHER_SYMBOL = 28;
272
273    /**
274     * Unicode category constant Pi.
275     *
276     * @since 1.4
277     */
278    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
279
280    /**
281     * Unicode category constant Pf.
282     *
283     * @since 1.4
284     */
285    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
286
287    /**
288     * Unicode bidirectional constant.
289     *
290     * @since 1.4
291     */
292    public static final byte DIRECTIONALITY_UNDEFINED = -1;
293
294    /**
295     * Unicode bidirectional constant L.
296     *
297     * @since 1.4
298     */
299    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
300
301    /**
302     * Unicode bidirectional constant R.
303     *
304     * @since 1.4
305     */
306    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
307
308    /**
309     * Unicode bidirectional constant AL.
310     *
311     * @since 1.4
312     */
313    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
314
315    /**
316     * Unicode bidirectional constant EN.
317     *
318     * @since 1.4
319     */
320    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
321
322    /**
323     * Unicode bidirectional constant ES.
324     *
325     * @since 1.4
326     */
327    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
328
329    /**
330     * Unicode bidirectional constant ET.
331     *
332     * @since 1.4
333     */
334    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
335
336    /**
337     * Unicode bidirectional constant AN.
338     *
339     * @since 1.4
340     */
341    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
342
343    /**
344     * Unicode bidirectional constant CS.
345     *
346     * @since 1.4
347     */
348    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
349
350    /**
351     * Unicode bidirectional constant NSM.
352     *
353     * @since 1.4
354     */
355    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
356
357    /**
358     * Unicode bidirectional constant BN.
359     *
360     * @since 1.4
361     */
362    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
363
364    /**
365     * Unicode bidirectional constant B.
366     *
367     * @since 1.4
368     */
369    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
370
371    /**
372     * Unicode bidirectional constant S.
373     *
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
377
378    /**
379     * Unicode bidirectional constant WS.
380     *
381     * @since 1.4
382     */
383    public static final byte DIRECTIONALITY_WHITESPACE = 12;
384
385    /**
386     * Unicode bidirectional constant ON.
387     *
388     * @since 1.4
389     */
390    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
391
392    /**
393     * Unicode bidirectional constant LRE.
394     *
395     * @since 1.4
396     */
397    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
398
399    /**
400     * Unicode bidirectional constant LRO.
401     *
402     * @since 1.4
403     */
404    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
405
406    /**
407     * Unicode bidirectional constant RLE.
408     *
409     * @since 1.4
410     */
411    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
412
413    /**
414     * Unicode bidirectional constant RLO.
415     *
416     * @since 1.4
417     */
418    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
419
420    /**
421     * Unicode bidirectional constant PDF.
422     *
423     * @since 1.4
424     */
425    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
426
427    /**
428     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
429     * encoding, {@code '\uD800'}.
430     *
431     * @since 1.5
432     */
433    public static final char MIN_HIGH_SURROGATE = '\uD800';
434
435    /**
436     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
437     * encoding, {@code '\uDBFF'}.
438     *
439     * @since 1.5
440     */
441    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
442
443    /**
444     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
445     * encoding, {@code '\uDC00'}.
446     *
447     * @since 1.5
448     */
449    public static final char MIN_LOW_SURROGATE = '\uDC00';
450
451    /**
452     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
453     * encoding, {@code '\uDFFF'}.
454     *
455     * @since 1.5
456     */
457    public static final char MAX_LOW_SURROGATE = '\uDFFF';
458
459    /**
460     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
461     *
462     * @since 1.5
463     */
464    public static final char MIN_SURROGATE = '\uD800';
465
466    /**
467     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
468     *
469     * @since 1.5
470     */
471    public static final char MAX_SURROGATE = '\uDFFF';
472
473    /**
474     * The minimum value of a supplementary code point, {@code U+010000}.
475     *
476     * @since 1.5
477     */
478    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
479
480    /**
481     * The minimum code point value, {@code U+0000}.
482     *
483     * @since 1.5
484     */
485    public static final int MIN_CODE_POINT = 0x000000;
486
487    /**
488     * The maximum code point value, {@code U+10FFFF}.
489     *
490     * @since 1.5
491     */
492    public static final int MAX_CODE_POINT = 0x10FFFF;
493
494    /**
495     * The number of bits required to represent a {@code Character} value
496     * unsigned form.
497     *
498     * @since 1.5
499     */
500    public static final int SIZE = 16;
501
502    private static final byte[] DIRECTIONALITY = new byte[] {
503            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
504            DIRECTIONALITY_EUROPEAN_NUMBER,
505            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
506            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
507            DIRECTIONALITY_ARABIC_NUMBER,
508            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
509            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
510            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
511            DIRECTIONALITY_OTHER_NEUTRALS,
512            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
513            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
514            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
515            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
516            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
517            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
518            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
519
520    /*
521     * Represents a subset of the Unicode character set.
522     */
523    public static class Subset {
524        String name;
525
526        /**
527         * Constructs a new {@code Subset}.
528         *
529         * @param string
530         *            this subset's name.
531         */
532        protected Subset(String string) {
533            if (string == null) {
534                throw new NullPointerException();
535            }
536            name = string;
537        }
538
539        /**
540         * Compares this character subset with the specified object. Uses
541         * {@link java.lang.Object#equals(Object)} to do the comparison.
542         *
543         * @param object
544         *            the object to compare this character subset with.
545         * @return {@code true} if {@code object} is this subset, that is, if
546         *         {@code object == this}; {@code false} otherwise.
547         */
548        @Override
549        public final boolean equals(Object object) {
550            return super.equals(object);
551        }
552
553        /**
554         * Returns the integer hash code for this character subset.
555         *
556         * @return this subset's hash code, which is the hash code computed by
557         *         {@link java.lang.Object#hashCode()}.
558         */
559        @Override
560        public final int hashCode() {
561            return super.hashCode();
562        }
563
564        /**
565         * Returns the string representation of this subset.
566         *
567         * @return this subset's name.
568         */
569        @Override
570        public final String toString() {
571            return name;
572        }
573    }
574
575    /**
576     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
577     * specification.
578     *
579     * @since 1.2
580     */
581    public static final class UnicodeBlock extends Subset {
582        /**
583         * The &quot;Surrogates Area&quot; Unicode Block.
584         *
585         * @deprecated As of Java 5, this block has been replaced by
586         *             {@link #HIGH_SURROGATES},
587         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
588         *             {@link #LOW_SURROGATES}.
589         */
590        @Deprecated
591        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
592        /**
593         * The &quot;Basic Latin&quot; Unicode Block.
594         *
595         * @since 1.2
596         */
597        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
598        /**
599         * The &quot;Latin-1 Supplement&quot; Unicode Block.
600         *
601         * @since 1.2
602         */
603        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
604        /**
605         * The &quot;Latin Extended-A&quot; Unicode Block.
606         *
607         * @since 1.2
608         */
609        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
610        /**
611         * The &quot;Latin Extended-B&quot; Unicode Block.
612         *
613         * @since 1.2
614         */
615        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
616        /**
617         * The &quot;IPA Extensions&quot; Unicode Block.
618         *
619         * @since 1.2
620         */
621        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
622        /**
623         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
624         *
625         * @since 1.2
626         */
627        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
628        /**
629         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
630         *
631         * @since 1.2
632         */
633        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
634        /**
635         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
636         * to as &quot;Greek&quot;.
637         *
638         * @since 1.2
639         */
640        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
641        /**
642         * The &quot;Cyrillic&quot; Unicode Block.
643         *
644         * @since 1.2
645         */
646        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
647        /**
648         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
649         * referred to as &quot;Cyrillic Supplementary&quot;.
650         *
651         * @since 1.5
652         */
653        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
654        /**
655         * The &quot;Armenian&quot; Unicode Block.
656         *
657         * @since 1.2
658         */
659        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
660        /**
661         * The &quot;Hebrew&quot; Unicode Block.
662         *
663         * @since 1.2
664         */
665        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
666        /**
667         * The &quot;Arabic&quot; Unicode Block.
668         *
669         * @since 1.2
670         */
671        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
672        /**
673         * The &quot;Syriac&quot; Unicode Block.
674         *
675         * @since 1.4
676         */
677        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
678        /**
679         * The &quot;Thaana&quot; Unicode Block.
680         *
681         * @since 1.4
682         */
683        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
684        /**
685         * The &quot;Devanagari&quot; Unicode Block.
686         *
687         * @since 1.2
688         */
689        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
690        /**
691         * The &quot;Bengali&quot; Unicode Block.
692         *
693         * @since 1.2
694         */
695        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
696        /**
697         * The &quot;Gurmukhi&quot; Unicode Block.
698         *
699         * @since 1.2
700         */
701        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
702        /**
703         * The &quot;Gujarati&quot; Unicode Block.
704         *
705         * @since 1.2
706         */
707        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
708        /**
709         * The &quot;Oriya&quot; Unicode Block.
710         *
711         * @since 1.2
712         */
713        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
714        /**
715         * The &quot;Tamil&quot; Unicode Block.
716         *
717         * @since 1.2
718         */
719        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
720        /**
721         * The &quot;Telugu&quot; Unicode Block.
722         *
723         * @since 1.2
724         */
725        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
726        /**
727         * The &quot;Kannada&quot; Unicode Block.
728         *
729         * @since 1.2
730         */
731        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
732        /**
733         * The &quot;Malayalam&quot; Unicode Block.
734         *
735         * @since 1.2
736         */
737        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
738        /**
739         * The &quot;Sinhala&quot; Unicode Block.
740         *
741         * @since 1.4
742         */
743        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
744        /**
745         * The &quot;Thai&quot; Unicode Block.
746         *
747         * @since 1.2
748         */
749        public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
750        /**
751         * The &quot;Lao&quot; Unicode Block.
752         *
753         * @since 1.2
754         */
755        public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
756        /**
757         * The &quot;Tibetan&quot; Unicode Block.
758         *
759         * @since 1.2
760         */
761        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
762        /**
763         * The &quot;Myanmar&quot; Unicode Block.
764         *
765         * @since 1.4
766         */
767        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
768        /**
769         * The &quot;Georgian&quot; Unicode Block.
770         *
771         * @since 1.2
772         */
773        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
774        /**
775         * The &quot;Hangul Jamo&quot; Unicode Block.
776         *
777         * @since 1.2
778         */
779        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
780        /**
781         * The &quot;Ethiopic&quot; Unicode Block.
782         *
783         * @since 1.4
784         */
785        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
786        /**
787         * The &quot;Cherokee&quot; Unicode Block.
788         *
789         * @since 1.4
790         */
791        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
792        /**
793         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
794         *
795         * @since 1.4
796         */
797        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
798        /**
799         * The &quot;Ogham&quot; Unicode Block.
800         *
801         * @since 1.4
802         */
803        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
804        /**
805         * The &quot;Runic&quot; Unicode Block.
806         *
807         * @since 1.4
808         */
809        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
810        /**
811         * The &quot;Tagalog&quot; Unicode Block.
812         *
813         * @since 1.5
814         */
815        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
816        /**
817         * The &quot;Hanunoo&quot; Unicode Block.
818         *
819         * @since 1.5
820         */
821        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
822        /**
823         * The &quot;Buhid&quot; Unicode Block.
824         *
825         * @since 1.5
826         */
827        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
828        /**
829         * The &quot;Tagbanwa&quot; Unicode Block.
830         *
831         * @since 1.5
832         */
833        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
834        /**
835         * The &quot;Khmer&quot; Unicode Block.
836         *
837         * @since 1.4
838         */
839        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
840        /**
841         * The &quot;Mongolian&quot; Unicode Block.
842         *
843         * @since 1.4
844         */
845        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
846        /**
847         * The &quot;Limbu&quot; Unicode Block.
848         *
849         * @since 1.5
850         */
851        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
852        /**
853         * The &quot;Tai Le&quot; Unicode Block.
854         *
855         * @since 1.5
856         */
857        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
858        /**
859         * The &quot;Khmer Symbols&quot; Unicode Block.
860         *
861         * @since 1.5
862         */
863        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
864        /**
865         * The &quot;Phonetic Extensions&quot; Unicode Block.
866         *
867         * @since 1.5
868         */
869        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
870        /**
871         * The &quot;Latin Extended Additional&quot; Unicode Block.
872         *
873         * @since 1.2
874         */
875        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
876        /**
877         * The &quot;Greek Extended&quot; Unicode Block.
878         *
879         * @since 1.2
880         */
881        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
882        /**
883         * The &quot;General Punctuation&quot; Unicode Block.
884         *
885         * @since 1.2
886         */
887        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
888        /**
889         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
890         *
891         * @since 1.2
892         */
893        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
894        /**
895         * The &quot;Currency Symbols&quot; Unicode Block.
896         *
897         * @since 1.2
898         */
899        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
900        /**
901         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
902         * Block. Previously referred to as &quot;Combining Marks for
903         * Symbols&quot;.
904         *
905         * @since 1.2
906         */
907        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
908        /**
909         * The &quot;Letterlike Symbols&quot; Unicode Block.
910         *
911         * @since 1.2
912         */
913        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
914        /**
915         * The &quot;Number Forms&quot; Unicode Block.
916         *
917         * @since 1.2
918         */
919        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
920        /**
921         * The &quot;Arrows&quot; Unicode Block.
922         *
923         * @since 1.2
924         */
925        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
926        /**
927         * The &quot;Mathematical Operators&quot; Unicode Block.
928         *
929         * @since 1.2
930         */
931        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
932        /**
933         * The &quot;Miscellaneous Technical&quot; Unicode Block.
934         *
935         * @since 1.2
936         */
937        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
938        /**
939         * The &quot;Control Pictures&quot; Unicode Block.
940         *
941         * @since 1.2
942         */
943        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
944        /**
945         * The &quot;Optical Character Recognition&quot; Unicode Block.
946         *
947         * @since 1.2
948         */
949        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
950        /**
951         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
952         *
953         * @since 1.2
954         */
955        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
956        /**
957         * The &quot;Box Drawing&quot; Unicode Block.
958         *
959         * @since 1.2
960         */
961        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
962        /**
963         * The &quot;Block Elements&quot; Unicode Block.
964         *
965         * @since 1.2
966         */
967        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
968        /**
969         * The &quot;Geometric Shapes&quot; Unicode Block.
970         *
971         * @since 1.2
972         */
973        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
974        /**
975         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
976         *
977         * @since 1.2
978         */
979        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
980        /**
981         * The &quot;Dingbats&quot; Unicode Block.
982         *
983         * @since 1.2
984         */
985        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
986        /**
987         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
988         *
989         * @since 1.5
990         */
991        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
992        /**
993         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
994         *
995         * @since 1.5
996         */
997        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
998        /**
999         * The &quot;Braille Patterns&quot; Unicode Block.
1000         *
1001         * @since 1.4
1002         */
1003        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
1004        /**
1005         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
1006         *
1007         * @since 1.5
1008         */
1009        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1010        /**
1011         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
1012         *
1013         * @since 1.5
1014         */
1015        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1016        /**
1017         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1018         *
1019         * @since 1.5
1020         */
1021        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1022        /**
1023         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1024         *
1025         * @since 1.2
1026         */
1027        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1028        /**
1029         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1030         *
1031         * @since 1.4
1032         */
1033        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1034        /**
1035         * The &quot;Kangxi Radicals&quot; Unicode Block.
1036         *
1037         * @since 1.4
1038         */
1039        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
1040        /**
1041         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1042         *
1043         * @since 1.4
1044         */
1045        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1046        /**
1047         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1048         *
1049         * @since 1.2
1050         */
1051        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1052        /**
1053         * The &quot;Hiragana&quot; Unicode Block.
1054         *
1055         * @since 1.2
1056         */
1057        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
1058        /**
1059         * The &quot;Katakana&quot; Unicode Block.
1060         *
1061         * @since 1.2
1062         */
1063        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
1064        /**
1065         * The &quot;Bopomofo&quot; Unicode Block.
1066         *
1067         * @since 1.2
1068         */
1069        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
1070        /**
1071         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1072         *
1073         * @since 1.2
1074         */
1075        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1076        /**
1077         * The &quot;Kanbun&quot; Unicode Block.
1078         *
1079         * @since 1.2
1080         */
1081        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
1082        /**
1083         * The &quot;Bopomofo Extended&quot; Unicode Block.
1084         *
1085         * @since 1.4
1086         */
1087        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1088        /**
1089         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1090         *
1091         * @since 1.5
1092         */
1093        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1094        /**
1095         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1096         *
1097         * @since 1.2
1098         */
1099        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1100        /**
1101         * The &quot;CJK Compatibility&quot; Unicode Block.
1102         *
1103         * @since 1.2
1104         */
1105        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
1106        /**
1107         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1108         *
1109         * @since 1.4
1110         */
1111        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1112        /**
1113         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1114         *
1115         * @since 1.5
1116         */
1117        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1118        /**
1119         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1120         *
1121         * @since 1.2
1122         */
1123        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1124        /**
1125         * The &quot;Yi Syllables&quot; Unicode Block.
1126         *
1127         * @since 1.4
1128         */
1129        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
1130        /**
1131         * The &quot;Yi Radicals&quot; Unicode Block.
1132         *
1133         * @since 1.4
1134         */
1135        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
1136        /**
1137         * The &quot;Hangul Syllables&quot; Unicode Block.
1138         *
1139         * @since 1.2
1140         */
1141        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
1142        /**
1143         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1144         * code point values in the high surrogate range 0xD800 to 0xDB7F
1145         */
1146        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
1147        /**
1148         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1149         * represents code point values in the high surrogate range 0xDB80 to
1150         * 0xDBFF
1151         */
1152        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1153        /**
1154         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1155         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1156         */
1157        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
1158        /**
1159         * The &quot;Private Use Area&quot; Unicode Block.
1160         *
1161         * @since 1.2
1162         */
1163        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1164        /**
1165         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1166         *
1167         * @since 1.2
1168         */
1169        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1170        /**
1171         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1172         *
1173         * @since 1.2
1174         */
1175        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1176        /**
1177         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1178         *
1179         * @since 1.2
1180         */
1181        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1182        /**
1183         * The &quot;Variation Selectors&quot; Unicode Block.
1184         *
1185         * @since 1.5
1186         */
1187        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1188        /**
1189         * The &quot;Combining Half Marks&quot; Unicode Block.
1190         *
1191         * @since 1.2
1192         */
1193        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1194        /**
1195         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1196         *
1197         * @since 1.2
1198         */
1199        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1200        /**
1201         * The &quot;Small Form Variants&quot; Unicode Block.
1202         *
1203         * @since 1.2
1204         */
1205        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1206        /**
1207         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1208         *
1209         * @since 1.2
1210         */
1211        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1212        /**
1213         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1214         *
1215         * @since 1.2
1216         */
1217        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1218        /**
1219         * The &quot;Specials&quot; Unicode Block.
1220         *
1221         * @since 1.2
1222         */
1223        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
1224        /**
1225         * The &quot;Linear B Syllabary&quot; Unicode Block.
1226         *
1227         * @since 1.2
1228         */
1229        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1230        /**
1231         * The &quot;Linear B Ideograms&quot; Unicode Block.
1232         *
1233         * @since 1.5
1234         */
1235        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1236        /**
1237         * The &quot;Aegean Numbers&quot; Unicode Block.
1238         *
1239         * @since 1.5
1240         */
1241        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
1242        /**
1243         * The &quot;Old Italic&quot; Unicode Block.
1244         *
1245         * @since 1.5
1246         */
1247        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
1248        /**
1249         * The &quot;Gothic&quot; Unicode Block.
1250         *
1251         * @since 1.5
1252         */
1253        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
1254        /**
1255         * The &quot;Ugaritic&quot; Unicode Block.
1256         *
1257         * @since 1.5
1258         */
1259        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
1260        /**
1261         * The &quot;Deseret&quot; Unicode Block.
1262         *
1263         * @since 1.5
1264         */
1265        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
1266        /**
1267         * The &quot;Shavian&quot; Unicode Block.
1268         *
1269         * @since 1.5
1270         */
1271        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
1272        /**
1273         * The &quot;Osmanya&quot; Unicode Block.
1274         *
1275         * @since 1.5
1276         */
1277        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
1278        /**
1279         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1280         *
1281         * @since 1.5
1282         */
1283        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1284        /**
1285         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1286         *
1287         * @since 1.5
1288         */
1289        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1290        /**
1291         * The &quot;Musical Symbols&quot; Unicode Block.
1292         *
1293         * @since 1.5
1294         */
1295        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1296        /**
1297         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1298         *
1299         * @since 1.5
1300         */
1301        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1302        /**
1303         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1304         *
1305         * @since 1.5
1306         */
1307        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1308        /**
1309         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1310         *
1311         * @since 1.5
1312         */
1313        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1314        /**
1315         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1316         *
1317         * @since 1.5
1318         */
1319        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
1320        /**
1321         * The &quot;Tags&quot; Unicode Block.
1322         *
1323         * @since 1.5
1324         */
1325        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
1326        /**
1327         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1328         *
1329         * @since 1.5
1330         */
1331        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1332        /**
1333         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1334         *
1335         * @since 1.5
1336         */
1337        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1338        /**
1339         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1340         *
1341         * @since 1.5
1342         */
1343        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1344
1345        /*
1346         * All of the UnicodeBlocks with valid ranges in ascending order.
1347         */
1348        private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
1349            null,
1350            UnicodeBlock.BASIC_LATIN,
1351            UnicodeBlock.LATIN_1_SUPPLEMENT,
1352            UnicodeBlock.LATIN_EXTENDED_A,
1353            UnicodeBlock.LATIN_EXTENDED_B,
1354            UnicodeBlock.IPA_EXTENSIONS,
1355            UnicodeBlock.SPACING_MODIFIER_LETTERS,
1356            UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
1357            UnicodeBlock.GREEK,
1358            UnicodeBlock.CYRILLIC,
1359            UnicodeBlock.ARMENIAN,
1360            UnicodeBlock.HEBREW,
1361            UnicodeBlock.ARABIC,
1362            UnicodeBlock.SYRIAC,
1363            UnicodeBlock.THAANA,
1364            UnicodeBlock.DEVANAGARI,
1365            UnicodeBlock.BENGALI,
1366            UnicodeBlock.GURMUKHI,
1367            UnicodeBlock.GUJARATI,
1368            UnicodeBlock.ORIYA,
1369            UnicodeBlock.TAMIL,
1370            UnicodeBlock.TELUGU,
1371            UnicodeBlock.KANNADA,
1372            UnicodeBlock.MALAYALAM,
1373            UnicodeBlock.SINHALA,
1374            UnicodeBlock.THAI,
1375            UnicodeBlock.LAO,
1376            UnicodeBlock.TIBETAN,
1377            UnicodeBlock.MYANMAR,
1378            UnicodeBlock.GEORGIAN,
1379            UnicodeBlock.HANGUL_JAMO,
1380            UnicodeBlock.ETHIOPIC,
1381            UnicodeBlock.CHEROKEE,
1382            UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1383            UnicodeBlock.OGHAM,
1384            UnicodeBlock.RUNIC,
1385            UnicodeBlock.KHMER,
1386            UnicodeBlock.MONGOLIAN,
1387            UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
1388            UnicodeBlock.GREEK_EXTENDED,
1389            UnicodeBlock.GENERAL_PUNCTUATION,
1390            UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
1391            UnicodeBlock.CURRENCY_SYMBOLS,
1392            UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
1393            UnicodeBlock.LETTERLIKE_SYMBOLS,
1394            UnicodeBlock.NUMBER_FORMS,
1395            UnicodeBlock.ARROWS,
1396            UnicodeBlock.MATHEMATICAL_OPERATORS,
1397            UnicodeBlock.MISCELLANEOUS_TECHNICAL,
1398            UnicodeBlock.CONTROL_PICTURES,
1399            UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
1400            UnicodeBlock.ENCLOSED_ALPHANUMERICS,
1401            UnicodeBlock.BOX_DRAWING,
1402            UnicodeBlock.BLOCK_ELEMENTS,
1403            UnicodeBlock.GEOMETRIC_SHAPES,
1404            UnicodeBlock.MISCELLANEOUS_SYMBOLS,
1405            UnicodeBlock.DINGBATS,
1406            UnicodeBlock.BRAILLE_PATTERNS,
1407            UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
1408            UnicodeBlock.KANGXI_RADICALS,
1409            UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1410            UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
1411            UnicodeBlock.HIRAGANA,
1412            UnicodeBlock.KATAKANA,
1413            UnicodeBlock.BOPOMOFO,
1414            UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
1415            UnicodeBlock.KANBUN,
1416            UnicodeBlock.BOPOMOFO_EXTENDED,
1417            UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
1418            UnicodeBlock.CJK_COMPATIBILITY,
1419            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1420            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
1421            UnicodeBlock.YI_SYLLABLES,
1422            UnicodeBlock.YI_RADICALS,
1423            UnicodeBlock.HANGUL_SYLLABLES,
1424            UnicodeBlock.HIGH_SURROGATES,
1425            UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
1426            UnicodeBlock.LOW_SURROGATES,
1427            UnicodeBlock.PRIVATE_USE_AREA,
1428            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
1429            UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
1430            UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
1431            UnicodeBlock.COMBINING_HALF_MARKS,
1432            UnicodeBlock.CJK_COMPATIBILITY_FORMS,
1433            UnicodeBlock.SMALL_FORM_VARIANTS,
1434            UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
1435            UnicodeBlock.SPECIALS,
1436            UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
1437            UnicodeBlock.OLD_ITALIC,
1438            UnicodeBlock.GOTHIC,
1439            UnicodeBlock.DESERET,
1440            UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
1441            UnicodeBlock.MUSICAL_SYMBOLS,
1442            UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1443            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1444            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1445            UnicodeBlock.TAGS,
1446            UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
1447            UnicodeBlock.TAGALOG,
1448            UnicodeBlock.HANUNOO,
1449            UnicodeBlock.BUHID,
1450            UnicodeBlock.TAGBANWA,
1451            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1452            UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
1453            UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
1454            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1455            UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1456            UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
1457            UnicodeBlock.VARIATION_SELECTORS,
1458            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1459            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1460            UnicodeBlock.LIMBU,
1461            UnicodeBlock.TAI_LE,
1462            UnicodeBlock.KHMER_SYMBOLS,
1463            UnicodeBlock.PHONETIC_EXTENSIONS,
1464            UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1465            UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
1466            UnicodeBlock.LINEAR_B_SYLLABARY,
1467            UnicodeBlock.LINEAR_B_IDEOGRAMS,
1468            UnicodeBlock.AEGEAN_NUMBERS,
1469            UnicodeBlock.UGARITIC,
1470            UnicodeBlock.SHAVIAN,
1471            UnicodeBlock.OSMANYA,
1472            UnicodeBlock.CYPRIOT_SYLLABARY,
1473            UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
1474            UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
1475        };
1476
1477        /**
1478         * Retrieves the constant that corresponds to the specified block name.
1479         * The block names are defined by the Unicode 4.0.1 specification in the
1480         * {@code Blocks-4.0.1.txt} file.
1481         * <p>
1482         * Block names may be one of the following:
1483         * <ul>
1484         * <li>Canonical block name, as defined by the Unicode specification;
1485         * case-insensitive.</li>
1486         * <li>Canonical block name without any spaces, as defined by the
1487         * Unicode specification; case-insensitive.</li>
1488         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1489         * uppercasing the canonical name and replacing all spaces and hyphens
1490         * with underscores.</li>
1491         * </ul>
1492         *
1493         * @param blockName
1494         *            the name of the block to retrieve.
1495         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1496         * @throws NullPointerException
1497         *             if {@code blockName} is {@code null}.
1498         * @throws IllegalArgumentException
1499         *             if {@code blockName} is not a valid block name.
1500         * @since 1.5
1501         */
1502        public static UnicodeBlock forName(String blockName) {
1503            if (blockName == null) {
1504                throw new NullPointerException();
1505            }
1506            int block = forNameImpl(blockName);
1507            if (block == -1) {
1508                if (blockName.equals("SURROGATES_AREA")) {
1509                    return SURROGATES_AREA;
1510                } else if(blockName.equalsIgnoreCase("greek")) {
1511                    return GREEK;
1512                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1513                        blockName.equals("Combining Marks for Symbols") ||
1514                        blockName.equals("CombiningMarksforSymbols")) {
1515                    return COMBINING_MARKS_FOR_SYMBOLS;
1516                }
1517                throw new IllegalArgumentException();
1518            }
1519            return BLOCKS[block];
1520        }
1521
1522        /**
1523         * Gets the constant for the Unicode block that contains the specified
1524         * character.
1525         *
1526         * @param c
1527         *            the character for which to get the {@code UnicodeBlock}
1528         *            constant.
1529         * @return the {@code UnicodeBlock} constant for the block that contains
1530         *         {@code c}, or {@code null} if {@code c} does not belong to
1531         *         any defined block.
1532         */
1533        public static UnicodeBlock of(char c) {
1534            return of((int) c);
1535        }
1536
1537        /**
1538         * Gets the constant for the Unicode block that contains the specified
1539         * Unicode code point.
1540         *
1541         * @param codePoint
1542         *            the Unicode code point for which to get the
1543         *            {@code UnicodeBlock} constant.
1544         * @return the {@code UnicodeBlock} constant for the block that contains
1545         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1546         *         not belong to any defined block.
1547         * @throws IllegalArgumentException
1548         *             if {@code codePoint} is not a valid Unicode code point.
1549         * @since 1.5
1550         */
1551        public static UnicodeBlock of(int codePoint) {
1552            if (!isValidCodePoint(codePoint)) {
1553                throw new IllegalArgumentException();
1554            }
1555            int block = ofImpl(codePoint);
1556            if (block == -1 || block >= BLOCKS.length) {
1557                return null;
1558            }
1559            return BLOCKS[block];
1560        }
1561
1562        private UnicodeBlock(String blockName, int start, int end) {
1563            super(blockName);
1564        }
1565    }
1566
1567    private static native int forNameImpl(String blockName);
1568
1569    private static native int ofImpl(int codePoint);
1570
1571    /**
1572     * Constructs a new {@code Character} with the specified primitive char
1573     * value.
1574     *
1575     * @param value
1576     *            the primitive char value to store in the new instance.
1577     */
1578    public Character(char value) {
1579        this.value = value;
1580    }
1581
1582    /**
1583     * Gets the primitive value of this character.
1584     *
1585     * @return this object's primitive value.
1586     */
1587    public char charValue() {
1588        return value;
1589    }
1590
1591    /**
1592     * Compares this object to the specified character object to determine their
1593     * relative order.
1594     *
1595     * @param c
1596     *            the character object to compare this object to.
1597     * @return {@code 0} if the value of this character and the value of
1598     *         {@code c} are equal; a positive value if the value of this
1599     *         character is greater than the value of {@code c}; a negative
1600     *         value if the value of this character is less than the value of
1601     *         {@code c}.
1602     * @see java.lang.Comparable
1603     * @since 1.2
1604     */
1605    public int compareTo(Character c) {
1606        return value - c.value;
1607    }
1608
1609    /**
1610     * Returns a {@code Character} instance for the {@code char} value passed.
1611     * <p>
1612     * If it is not necessary to get a new {@code Character} instance, it is
1613     * recommended to use this method instead of the constructor, since it
1614     * maintains a cache of instances which may result in better performance.
1615     *
1616     * @param c
1617     *            the char value for which to get a {@code Character} instance.
1618     * @return the {@code Character} instance for {@code c}.
1619     * @since 1.5
1620     */
1621    public static Character valueOf(char c) {
1622        return c < 128 ? SMALL_VALUES[c] : new Character(c);
1623    }
1624
1625    /**
1626     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1627     */
1628    private static final Character[] SMALL_VALUES = new Character[128];
1629
1630    static {
1631        for(int i = 0; i < 128; i++) {
1632            SMALL_VALUES[i] = new Character((char) i);
1633        }
1634    }
1635    /**
1636     * Indicates whether {@code codePoint} is a valid Unicode code point.
1637     *
1638     * @param codePoint
1639     *            the code point to test.
1640     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1641     *         {@code false} otherwise.
1642     * @since 1.5
1643     */
1644    public static boolean isValidCodePoint(int codePoint) {
1645        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1646    }
1647
1648    /**
1649     * Indicates whether {@code codePoint} is within the supplementary code
1650     * point range.
1651     *
1652     * @param codePoint
1653     *            the code point to test.
1654     * @return {@code true} if {@code codePoint} is within the supplementary
1655     *         code point range; {@code false} otherwise.
1656     * @since 1.5
1657     */
1658    public static boolean isSupplementaryCodePoint(int codePoint) {
1659        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1660    }
1661
1662    /**
1663     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1664     * that is used for representing supplementary characters in UTF-16
1665     * encoding.
1666     *
1667     * @param ch
1668     *            the character to test.
1669     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1670     *         {@code false} otherwise.
1671     * @see #isLowSurrogate(char)
1672     * @since 1.5
1673     */
1674    public static boolean isHighSurrogate(char ch) {
1675        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1676    }
1677
1678    /**
1679     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1680     * that is used for representing supplementary characters in UTF-16
1681     * encoding.
1682     *
1683     * @param ch
1684     *            the character to test.
1685     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1686     *         {@code false} otherwise.
1687     * @see #isHighSurrogate(char)
1688     * @since 1.5
1689     */
1690    public static boolean isLowSurrogate(char ch) {
1691        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1692    }
1693
1694    /**
1695     * Indicates whether the specified character pair is a valid surrogate pair.
1696     *
1697     * @param high
1698     *            the high surrogate unit to test.
1699     * @param low
1700     *            the low surrogate unit to test.
1701     * @return {@code true} if {@code high} is a high-surrogate code unit and
1702     *         {@code low} is a low-surrogate code unit; {@code false}
1703     *         otherwise.
1704     * @see #isHighSurrogate(char)
1705     * @see #isLowSurrogate(char)
1706     * @since 1.5
1707     */
1708    public static boolean isSurrogatePair(char high, char low) {
1709        return (isHighSurrogate(high) && isLowSurrogate(low));
1710    }
1711
1712    /**
1713     * Calculates the number of {@code char} values required to represent the
1714     * specified Unicode code point. This method checks if the {@code codePoint}
1715     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1716     * returned, otherwise {@code 1}. To test if the code point is valid, use
1717     * the {@link #isValidCodePoint(int)} method.
1718     *
1719     * @param codePoint
1720     *            the code point for which to calculate the number of required
1721     *            chars.
1722     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1723     * @see #isValidCodePoint(int)
1724     * @see #isSupplementaryCodePoint(int)
1725     * @since 1.5
1726     */
1727    public static int charCount(int codePoint) {
1728        return (codePoint >= 0x10000 ? 2 : 1);
1729    }
1730
1731    /**
1732     * Converts a surrogate pair into a Unicode code point. This method assumes
1733     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1734     * surrogates, then the result is indeterminate. The
1735     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1736     * method to validate the pair.
1737     *
1738     * @param high
1739     *            the high surrogate unit.
1740     * @param low
1741     *            the low surrogate unit.
1742     * @return the Unicode code point corresponding to the surrogate unit pair.
1743     * @see #isSurrogatePair(char, char)
1744     * @since 1.5
1745     */
1746    public static int toCodePoint(char high, char low) {
1747        // See RFC 2781, Section 2.2
1748        // http://www.ietf.org/rfc/rfc2781.txt
1749        int h = (high & 0x3FF) << 10;
1750        int l = low & 0x3FF;
1751        return (h | l) + 0x10000;
1752    }
1753
1754    /**
1755     * Returns the code point at {@code index} in the specified sequence of
1756     * character units. If the unit at {@code index} is a high-surrogate unit,
1757     * {@code index + 1} is less than the length of the sequence and the unit at
1758     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1759     * point represented by the pair is returned; otherwise the {@code char}
1760     * value at {@code index} is returned.
1761     *
1762     * @param seq
1763     *            the source sequence of {@code char} units.
1764     * @param index
1765     *            the position in {@code seq} from which to retrieve the code
1766     *            point.
1767     * @return the Unicode code point or {@code char} value at {@code index} in
1768     *         {@code seq}.
1769     * @throws NullPointerException
1770     *             if {@code seq} is {@code null}.
1771     * @throws IndexOutOfBoundsException
1772     *             if the {@code index} is negative or greater than or equal to
1773     *             the length of {@code seq}.
1774     * @since 1.5
1775     */
1776    public static int codePointAt(CharSequence seq, int index) {
1777        if (seq == null) {
1778            throw new NullPointerException();
1779        }
1780        int len = seq.length();
1781        if (index < 0 || index >= len) {
1782            throw new IndexOutOfBoundsException();
1783        }
1784
1785        char high = seq.charAt(index++);
1786        if (index >= len) {
1787            return high;
1788        }
1789        char low = seq.charAt(index);
1790        if (isSurrogatePair(high, low)) {
1791            return toCodePoint(high, low);
1792        }
1793        return high;
1794    }
1795
1796    /**
1797     * Returns the code point at {@code index} in the specified array of
1798     * character units. If the unit at {@code index} is a high-surrogate unit,
1799     * {@code index + 1} is less than the length of the array and the unit at
1800     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1801     * point represented by the pair is returned; otherwise the {@code char}
1802     * value at {@code index} is returned.
1803     *
1804     * @param seq
1805     *            the source array of {@code char} units.
1806     * @param index
1807     *            the position in {@code seq} from which to retrieve the code
1808     *            point.
1809     * @return the Unicode code point or {@code char} value at {@code index} in
1810     *         {@code seq}.
1811     * @throws NullPointerException
1812     *             if {@code seq} is {@code null}.
1813     * @throws IndexOutOfBoundsException
1814     *             if the {@code index} is negative or greater than or equal to
1815     *             the length of {@code seq}.
1816     * @since 1.5
1817     */
1818    public static int codePointAt(char[] seq, int index) {
1819        if (seq == null) {
1820            throw new NullPointerException();
1821        }
1822        int len = seq.length;
1823        if (index < 0 || index >= len) {
1824            throw new IndexOutOfBoundsException();
1825        }
1826
1827        char high = seq[index++];
1828        if (index >= len) {
1829            return high;
1830        }
1831        char low = seq[index];
1832        if (isSurrogatePair(high, low)) {
1833            return toCodePoint(high, low);
1834        }
1835        return high;
1836    }
1837
1838    /**
1839     * Returns the code point at {@code index} in the specified array of
1840     * character units, where {@code index} has to be less than {@code limit}.
1841     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1842     * is less than {@code limit} and the unit at {@code index + 1} is a
1843     * low-surrogate unit, then the supplementary code point represented by the
1844     * pair is returned; otherwise the {@code char} value at {@code index} is
1845     * returned.
1846     *
1847     * @param seq
1848     *            the source array of {@code char} units.
1849     * @param index
1850     *            the position in {@code seq} from which to get the code point.
1851     * @param limit
1852     *            the index after the last unit in {@code seq} that can be used.
1853     * @return the Unicode code point or {@code char} value at {@code index} in
1854     *         {@code seq}.
1855     * @throws NullPointerException
1856     *             if {@code seq} is {@code null}.
1857     * @throws IndexOutOfBoundsException
1858     *             if {@code index < 0}, {@code index >= limit},
1859     *             {@code limit < 0} or if {@code limit} is greater than the
1860     *             length of {@code seq}.
1861     * @since 1.5
1862     */
1863    public static int codePointAt(char[] seq, int index, int limit) {
1864        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1865            throw new IndexOutOfBoundsException();
1866        }
1867
1868        char high = seq[index++];
1869        if (index >= limit) {
1870            return high;
1871        }
1872        char low = seq[index];
1873        if (isSurrogatePair(high, low)) {
1874            return toCodePoint(high, low);
1875        }
1876        return high;
1877    }
1878
1879    /**
1880     * Returns the code point that precedes {@code index} in the specified
1881     * sequence of character units. If the unit at {@code index - 1} is a
1882     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1883     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1884     * point represented by the pair is returned; otherwise the {@code char}
1885     * value at {@code index - 1} is returned.
1886     *
1887     * @param seq
1888     *            the source sequence of {@code char} units.
1889     * @param index
1890     *            the position in {@code seq} following the code
1891     *            point that should be returned.
1892     * @return the Unicode code point or {@code char} value before {@code index}
1893     *         in {@code seq}.
1894     * @throws NullPointerException
1895     *             if {@code seq} is {@code null}.
1896     * @throws IndexOutOfBoundsException
1897     *             if the {@code index} is less than 1 or greater than the
1898     *             length of {@code seq}.
1899     * @since 1.5
1900     */
1901    public static int codePointBefore(CharSequence seq, int index) {
1902        if (seq == null) {
1903            throw new NullPointerException();
1904        }
1905        int len = seq.length();
1906        if (index < 1 || index > len) {
1907            throw new IndexOutOfBoundsException();
1908        }
1909
1910        char low = seq.charAt(--index);
1911        if (--index < 0) {
1912            return low;
1913        }
1914        char high = seq.charAt(index);
1915        if (isSurrogatePair(high, low)) {
1916            return toCodePoint(high, low);
1917        }
1918        return low;
1919    }
1920
1921    /**
1922     * Returns the code point that precedes {@code index} in the specified
1923     * array of character units. If the unit at {@code index - 1} is a
1924     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1925     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1926     * point represented by the pair is returned; otherwise the {@code char}
1927     * value at {@code index - 1} is returned.
1928     *
1929     * @param seq
1930     *            the source array of {@code char} units.
1931     * @param index
1932     *            the position in {@code seq} following the code
1933     *            point that should be returned.
1934     * @return the Unicode code point or {@code char} value before {@code index}
1935     *         in {@code seq}.
1936     * @throws NullPointerException
1937     *             if {@code seq} is {@code null}.
1938     * @throws IndexOutOfBoundsException
1939     *             if the {@code index} is less than 1 or greater than the
1940     *             length of {@code seq}.
1941     * @since 1.5
1942     */
1943    public static int codePointBefore(char[] seq, int index) {
1944        if (seq == null) {
1945            throw new NullPointerException();
1946        }
1947        int len = seq.length;
1948        if (index < 1 || index > len) {
1949            throw new IndexOutOfBoundsException();
1950        }
1951
1952        char low = seq[--index];
1953        if (--index < 0) {
1954            return low;
1955        }
1956        char high = seq[index];
1957        if (isSurrogatePair(high, low)) {
1958            return toCodePoint(high, low);
1959        }
1960        return low;
1961    }
1962
1963    /**
1964     * Returns the code point that precedes the {@code index} in the specified
1965     * array of character units and is not less than {@code start}. If the unit
1966     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1967     * less than {@code start} and the unit at {@code index - 2} is a
1968     * high-surrogate unit, then the supplementary code point represented by the
1969     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1970     * is returned.
1971     *
1972     * @param seq
1973     *            the source array of {@code char} units.
1974     * @param index
1975     *            the position in {@code seq} following the code point that
1976     *            should be returned.
1977     * @param start
1978     *            the index of the first element in {@code seq}.
1979     * @return the Unicode code point or {@code char} value before {@code index}
1980     *         in {@code seq}.
1981     * @throws NullPointerException
1982     *             if {@code seq} is {@code null}.
1983     * @throws IndexOutOfBoundsException
1984     *             if the {@code index <= start}, {@code start < 0},
1985     *             {@code index} is greater than the length of {@code seq}, or
1986     *             if {@code start} is equal or greater than the length of
1987     *             {@code seq}.
1988     * @since 1.5
1989     */
1990    public static int codePointBefore(char[] seq, int index, int start) {
1991        if (seq == null) {
1992            throw new NullPointerException();
1993        }
1994        int len = seq.length;
1995        if (index <= start || index > len || start < 0 || start >= len) {
1996            throw new IndexOutOfBoundsException();
1997        }
1998
1999        char low = seq[--index];
2000        if (--index < start) {
2001            return low;
2002        }
2003        char high = seq[index];
2004        if (isSurrogatePair(high, low)) {
2005            return toCodePoint(high, low);
2006        }
2007        return low;
2008    }
2009
2010    /**
2011     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2012     * and copies the value(s) into the char array {@code dst}, starting at
2013     * index {@code dstIndex}.
2014     *
2015     * @param codePoint
2016     *            the Unicode code point to encode.
2017     * @param dst
2018     *            the destination array to copy the encoded value into.
2019     * @param dstIndex
2020     *            the index in {@code dst} from where to start copying.
2021     * @return the number of {@code char} value units copied into {@code dst}.
2022     * @throws IllegalArgumentException
2023     *             if {@code codePoint} is not a valid Unicode code point.
2024     * @throws NullPointerException
2025     *             if {@code dst} is {@code null}.
2026     * @throws IndexOutOfBoundsException
2027     *             if {@code dstIndex} is negative, greater than or equal to
2028     *             {@code dst.length} or equals {@code dst.length - 1} when
2029     *             {@code codePoint} is a
2030     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
2031     * @since 1.5
2032     */
2033    public static int toChars(int codePoint, char[] dst, int dstIndex) {
2034        if (!isValidCodePoint(codePoint)) {
2035            throw new IllegalArgumentException();
2036        }
2037        if (dst == null) {
2038            throw new NullPointerException();
2039        }
2040        if (dstIndex < 0 || dstIndex >= dst.length) {
2041            throw new IndexOutOfBoundsException();
2042        }
2043
2044        if (isSupplementaryCodePoint(codePoint)) {
2045            if (dstIndex == dst.length - 1) {
2046                throw new IndexOutOfBoundsException();
2047            }
2048            // See RFC 2781, Section 2.1
2049            // http://www.ietf.org/rfc/rfc2781.txt
2050            int cpPrime = codePoint - 0x10000;
2051            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2052            int low = 0xDC00 | (cpPrime & 0x3FF);
2053            dst[dstIndex] = (char) high;
2054            dst[dstIndex + 1] = (char) low;
2055            return 2;
2056        }
2057
2058        dst[dstIndex] = (char) codePoint;
2059        return 1;
2060    }
2061
2062    /**
2063     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2064     * and returns it as a char array.
2065     *
2066     * @param codePoint
2067     *            the Unicode code point to encode.
2068     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
2069     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
2070     *         then the returned array contains two characters, otherwise it
2071     *         contains just one character.
2072     * @throws IllegalArgumentException
2073     *             if {@code codePoint} is not a valid Unicode code point.
2074     * @since 1.5
2075     */
2076    public static char[] toChars(int codePoint) {
2077        if (!isValidCodePoint(codePoint)) {
2078            throw new IllegalArgumentException();
2079        }
2080
2081        if (isSupplementaryCodePoint(codePoint)) {
2082            int cpPrime = codePoint - 0x10000;
2083            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2084            int low = 0xDC00 | (cpPrime & 0x3FF);
2085            return new char[] { (char) high, (char) low };
2086        }
2087        return new char[] { (char) codePoint };
2088    }
2089
2090    /**
2091     * Counts the number of Unicode code points in the subsequence of the
2092     * specified character sequence, as delineated by {@code beginIndex} and
2093     * {@code endIndex}. Any surrogate values with missing pair values will be
2094     * counted as one code point.
2095     *
2096     * @param seq
2097     *            the {@code CharSequence} to look through.
2098     * @param beginIndex
2099     *            the inclusive index to begin counting at.
2100     * @param endIndex
2101     *            the exclusive index to stop counting at.
2102     * @return the number of Unicode code points.
2103     * @throws NullPointerException
2104     *             if {@code seq} is {@code null}.
2105     * @throws IndexOutOfBoundsException
2106     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2107     *             if {@code endIndex} is greater than the length of {@code seq}.
2108     * @since 1.5
2109     */
2110    public static int codePointCount(CharSequence seq, int beginIndex,
2111            int endIndex) {
2112        if (seq == null) {
2113            throw new NullPointerException();
2114        }
2115        int len = seq.length();
2116        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2117            throw new IndexOutOfBoundsException();
2118        }
2119
2120        int result = 0;
2121        for (int i = beginIndex; i < endIndex; i++) {
2122            char c = seq.charAt(i);
2123            if (isHighSurrogate(c)) {
2124                if (++i < endIndex) {
2125                    c = seq.charAt(i);
2126                    if (!isLowSurrogate(c)) {
2127                        result++;
2128                    }
2129                }
2130            }
2131            result++;
2132        }
2133        return result;
2134    }
2135
2136    /**
2137     * Counts the number of Unicode code points in the subsequence of the
2138     * specified char array, as delineated by {@code offset} and {@code count}.
2139     * Any surrogate values with missing pair values will be counted as one code
2140     * point.
2141     *
2142     * @param seq
2143     *            the char array to look through
2144     * @param offset
2145     *            the inclusive index to begin counting at.
2146     * @param count
2147     *            the number of {@code char} values to look through in
2148     *            {@code seq}.
2149     * @return the number of Unicode code points.
2150     * @throws NullPointerException
2151     *             if {@code seq} is {@code null}.
2152     * @throws IndexOutOfBoundsException
2153     *             if {@code offset < 0}, {@code count < 0} or if
2154     *             {@code offset + count} is greater than the length of
2155     *             {@code seq}.
2156     * @since 1.5
2157     */
2158    public static int codePointCount(char[] seq, int offset, int count) {
2159        Arrays.checkOffsetAndCount(seq.length, offset, count);
2160        int endIndex = offset + count;
2161        int result = 0;
2162        for (int i = offset; i < endIndex; i++) {
2163            char c = seq[i];
2164            if (isHighSurrogate(c)) {
2165                if (++i < endIndex) {
2166                    c = seq[i];
2167                    if (!isLowSurrogate(c)) {
2168                        result++;
2169                    }
2170                }
2171            }
2172            result++;
2173        }
2174        return result;
2175    }
2176
2177    /**
2178     * Determines the index in the specified character sequence that is offset
2179     * {@code codePointOffset} code points from {@code index}.
2180     *
2181     * @param seq
2182     *            the character sequence to find the index in.
2183     * @param index
2184     *            the start index in {@code seq}.
2185     * @param codePointOffset
2186     *            the number of code points to look backwards or forwards; may
2187     *            be a negative or positive value.
2188     * @return the index in {@code seq} that is {@code codePointOffset} code
2189     *         points away from {@code index}.
2190     * @throws NullPointerException
2191     *             if {@code seq} is {@code null}.
2192     * @throws IndexOutOfBoundsException
2193     *             if {@code index < 0}, {@code index} is greater than the
2194     *             length of {@code seq}, or if there are not enough values in
2195     *             {@code seq} to skip {@code codePointOffset} code points
2196     *             forwards or backwards (if {@code codePointOffset} is
2197     *             negative) from {@code index}.
2198     * @since 1.5
2199     */
2200    public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
2201        if (seq == null) {
2202            throw new NullPointerException();
2203        }
2204        int len = seq.length();
2205        if (index < 0 || index > len) {
2206            throw new IndexOutOfBoundsException();
2207        }
2208
2209        if (codePointOffset == 0) {
2210            return index;
2211        }
2212
2213        if (codePointOffset > 0) {
2214            int codePoints = codePointOffset;
2215            int i = index;
2216            while (codePoints > 0) {
2217                codePoints--;
2218                if (i >= len) {
2219                    throw new IndexOutOfBoundsException();
2220                }
2221                if (isHighSurrogate(seq.charAt(i))) {
2222                    int next = i + 1;
2223                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2224                        i++;
2225                    }
2226                }
2227                i++;
2228            }
2229            return i;
2230        }
2231
2232        int codePoints = -codePointOffset;
2233        int i = index;
2234        while (codePoints > 0) {
2235            codePoints--;
2236            i--;
2237            if (i < 0) {
2238                throw new IndexOutOfBoundsException();
2239            }
2240            if (isLowSurrogate(seq.charAt(i))) {
2241                int prev = i - 1;
2242                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2243                    i--;
2244                }
2245            }
2246        }
2247        return i;
2248    }
2249
2250    /**
2251     * Determines the index in a subsequence of the specified character array
2252     * that is offset {@code codePointOffset} code points from {@code index}.
2253     * The subsequence is delineated by {@code start} and {@code count}.
2254     *
2255     * @param seq
2256     *            the character array to find the index in.
2257     * @param start
2258     *            the inclusive index that marks the beginning of the
2259     *            subsequence.
2260     * @param count
2261     *            the number of {@code char} values to include within the
2262     *            subsequence.
2263     * @param index
2264     *            the start index in the subsequence of the char array.
2265     * @param codePointOffset
2266     *            the number of code points to look backwards or forwards; may
2267     *            be a negative or positive value.
2268     * @return the index in {@code seq} that is {@code codePointOffset} code
2269     *         points away from {@code index}.
2270     * @throws NullPointerException
2271     *             if {@code seq} is {@code null}.
2272     * @throws IndexOutOfBoundsException
2273     *             if {@code start < 0}, {@code count < 0},
2274     *             {@code index < start}, {@code index > start + count},
2275     *             {@code start + count} is greater than the length of
2276     *             {@code seq}, or if there are not enough values in
2277     *             {@code seq} to skip {@code codePointOffset} code points
2278     *             forward or backward (if {@code codePointOffset} is
2279     *             negative) from {@code index}.
2280     * @since 1.5
2281     */
2282    public static int offsetByCodePoints(char[] seq, int start, int count,
2283            int index, int codePointOffset) {
2284        Arrays.checkOffsetAndCount(seq.length, start, count);
2285        int end = start + count;
2286        if (index < start || index > end) {
2287            throw new IndexOutOfBoundsException();
2288        }
2289
2290        if (codePointOffset == 0) {
2291            return index;
2292        }
2293
2294        if (codePointOffset > 0) {
2295            int codePoints = codePointOffset;
2296            int i = index;
2297            while (codePoints > 0) {
2298                codePoints--;
2299                if (i >= end) {
2300                    throw new IndexOutOfBoundsException();
2301                }
2302                if (isHighSurrogate(seq[i])) {
2303                    int next = i + 1;
2304                    if (next < end && isLowSurrogate(seq[next])) {
2305                        i++;
2306                    }
2307                }
2308                i++;
2309            }
2310            return i;
2311        }
2312
2313        int codePoints = -codePointOffset;
2314        int i = index;
2315        while (codePoints > 0) {
2316            codePoints--;
2317            i--;
2318            if (i < start) {
2319                throw new IndexOutOfBoundsException();
2320            }
2321            if (isLowSurrogate(seq[i])) {
2322                int prev = i - 1;
2323                if (prev >= start && isHighSurrogate(seq[prev])) {
2324                    i--;
2325                }
2326            }
2327        }
2328        return i;
2329    }
2330
2331    /**
2332     * Convenience method to determine the value of the specified character
2333     * {@code c} in the supplied radix. The value of {@code radix} must be
2334     * between MIN_RADIX and MAX_RADIX.
2335     *
2336     * @param c
2337     *            the character to determine the value of.
2338     * @param radix
2339     *            the radix.
2340     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2341     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2342     */
2343    public static int digit(char c, int radix) {
2344        return digit((int) c, radix);
2345    }
2346
2347    /**
2348     * Convenience method to determine the value of the character
2349     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2350     * be between MIN_RADIX and MAX_RADIX.
2351     *
2352     * @param codePoint
2353     *            the character, including supplementary characters.
2354     * @param radix
2355     *            the radix.
2356     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2357     *         {@link #MAX_RADIX} then the value of the character in the radix;
2358     *         -1 otherwise.
2359     */
2360    public static int digit(int codePoint, int radix) {
2361        if (radix < MIN_RADIX || radix > MAX_RADIX) {
2362            return -1;
2363        }
2364        if (codePoint < 128) {
2365            // Optimized for ASCII
2366            int result = -1;
2367            if ('0' <= codePoint && codePoint <= '9') {
2368                result = codePoint - '0';
2369            } else if ('a' <= codePoint && codePoint <= 'z') {
2370                result = 10 + (codePoint - 'a');
2371            } else if ('A' <= codePoint && codePoint <= 'Z') {
2372                result = 10 + (codePoint - 'A');
2373            }
2374            return result < radix ? result : -1;
2375        }
2376        return digitImpl(codePoint, radix);
2377    }
2378
2379    private static native int digitImpl(int codePoint, int radix);
2380
2381    /**
2382     * Compares this object with the specified object and indicates if they are
2383     * equal. In order to be equal, {@code object} must be an instance of
2384     * {@code Character} and have the same char value as this object.
2385     *
2386     * @param object
2387     *            the object to compare this double with.
2388     * @return {@code true} if the specified object is equal to this
2389     *         {@code Character}; {@code false} otherwise.
2390     */
2391    @Override
2392    public boolean equals(Object object) {
2393        return (object instanceof Character) && (value == ((Character) object).value);
2394    }
2395
2396    /**
2397     * Returns the character which represents the specified digit in the
2398     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2399     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2400     * smaller than {@code radix}. If any of these conditions does not hold, 0
2401     * is returned.
2402     *
2403     * @param digit
2404     *            the integer value.
2405     * @param radix
2406     *            the radix.
2407     * @return the character which represents the {@code digit} in the
2408     *         {@code radix}.
2409     */
2410    public static char forDigit(int digit, int radix) {
2411        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2412            if (digit >= 0 && digit < radix) {
2413                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2414            }
2415        }
2416        return 0;
2417    }
2418
2419    /**
2420     * Returns the numeric value of the specified Unicode character.
2421     * See {@link #getNumericValue(int)}.
2422     *
2423     * @param c the character
2424     * @return a non-negative numeric integer value if a numeric value for
2425     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2426     *         -2 if the numeric value can not be represented as an integer.
2427     */
2428    public static int getNumericValue(char c) {
2429        return getNumericValue((int) c);
2430    }
2431
2432    /**
2433     * Gets the numeric value of the specified Unicode code point. For example,
2434     * the code point '\u216B' stands for the Roman number XII, which has the
2435     * numeric value 12.
2436     *
2437     * <p>There are two points of divergence between this method and the Unicode
2438     * specification. This method treats the letters a-z (in both upper and lower
2439     * cases, and their full-width variants) as numbers from 10 to 35. The
2440     * Unicode specification also supports the idea of code points with non-integer
2441     * numeric values; this method does not (except to the extent of returning -2
2442     * for such code points).
2443     *
2444     * @param codePoint the code point
2445     * @return a non-negative numeric integer value if a numeric value for
2446     *         {@code codePoint} exists, -1 if there is no numeric value for
2447     *         {@code codePoint}, -2 if the numeric value can not be
2448     *         represented with an integer.
2449     */
2450    public static int getNumericValue(int codePoint) {
2451        // This is both an optimization and papers over differences between Java and ICU.
2452        if (codePoint < 128) {
2453            if (codePoint >= '0' && codePoint <= '9') {
2454                return codePoint - '0';
2455            }
2456            if (codePoint >= 'a' && codePoint <= 'z') {
2457                return codePoint - ('a' - 10);
2458            }
2459            if (codePoint >= 'A' && codePoint <= 'Z') {
2460                return codePoint - ('A' - 10);
2461            }
2462            return -1;
2463        }
2464        // Full-width uppercase A-Z.
2465        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
2466            return codePoint - 0xff17;
2467        }
2468        // Full-width lowercase a-z.
2469        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
2470            return codePoint - 0xff37;
2471        }
2472        return getNumericValueImpl(codePoint);
2473    }
2474
2475    private static native int getNumericValueImpl(int codePoint);
2476
2477    /**
2478     * Gets the general Unicode category of the specified character.
2479     *
2480     * @param c
2481     *            the character to get the category of.
2482     * @return the Unicode category of {@code c}.
2483     */
2484    public static int getType(char c) {
2485        return getType((int) c);
2486    }
2487
2488    /**
2489     * Gets the general Unicode category of the specified code point.
2490     *
2491     * @param codePoint
2492     *            the Unicode code point to get the category of.
2493     * @return the Unicode category of {@code codePoint}.
2494     */
2495    public static int getType(int codePoint) {
2496        int type = getTypeImpl(codePoint);
2497        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
2498        if (type <= Character.FORMAT) {
2499            return type;
2500        }
2501        return (type + 1);
2502    }
2503
2504    private static native int getTypeImpl(int codePoint);
2505
2506    /**
2507     * Gets the Unicode directionality of the specified character.
2508     *
2509     * @param c
2510     *            the character to get the directionality of.
2511     * @return the Unicode directionality of {@code c}.
2512     */
2513    public static byte getDirectionality(char c) {
2514        return getDirectionality((int)c);
2515    }
2516
2517    /**
2518     * Gets the Unicode directionality of the specified character.
2519     *
2520     * @param codePoint
2521     *            the Unicode code point to get the directionality of.
2522     * @return the Unicode directionality of {@code codePoint}.
2523     */
2524    public static byte getDirectionality(int codePoint) {
2525        if (getType(codePoint) == Character.UNASSIGNED) {
2526            return Character.DIRECTIONALITY_UNDEFINED;
2527        }
2528
2529        byte directionality = getDirectionalityImpl(codePoint);
2530        if (directionality == -1) {
2531            return -1;
2532        }
2533        return DIRECTIONALITY[directionality];
2534    }
2535
2536    private static native byte getDirectionalityImpl(int codePoint);
2537
2538    /**
2539     * Indicates whether the specified character is mirrored.
2540     *
2541     * @param c
2542     *            the character to check.
2543     * @return {@code true} if {@code c} is mirrored; {@code false}
2544     *         otherwise.
2545     */
2546    public static boolean isMirrored(char c) {
2547        return isMirrored((int) c);
2548    }
2549
2550    /**
2551     * Indicates whether the specified code point is mirrored.
2552     *
2553     * @param codePoint
2554     *            the code point to check.
2555     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2556     *         otherwise.
2557     */
2558    public static boolean isMirrored(int codePoint) {
2559        return isMirroredImpl(codePoint);
2560    }
2561
2562    private static native boolean isMirroredImpl(int codePoint);
2563
2564    @Override
2565    public int hashCode() {
2566        return value;
2567    }
2568
2569    /**
2570     * Indicates whether the specified character is defined in the Unicode
2571     * specification.
2572     *
2573     * @param c
2574     *            the character to check.
2575     * @return {@code true} if the general Unicode category of the character is
2576     *         not {@code UNASSIGNED}; {@code false} otherwise.
2577     */
2578    public static boolean isDefined(char c) {
2579        return isDefinedImpl(c);
2580    }
2581
2582    /**
2583     * Indicates whether the specified code point is defined in the Unicode
2584     * specification.
2585     *
2586     * @param codePoint
2587     *            the code point to check.
2588     * @return {@code true} if the general Unicode category of the code point is
2589     *         not {@code UNASSIGNED}; {@code false} otherwise.
2590     */
2591    public static boolean isDefined(int codePoint) {
2592        return isDefinedImpl(codePoint);
2593    }
2594
2595    private static native boolean isDefinedImpl(int codePoint);
2596
2597    /**
2598     * Indicates whether the specified character is a digit.
2599     *
2600     * @param c
2601     *            the character to check.
2602     * @return {@code true} if {@code c} is a digit; {@code false}
2603     *         otherwise.
2604     */
2605    public static boolean isDigit(char c) {
2606        return isDigit((int) c);
2607    }
2608
2609    /**
2610     * Indicates whether the specified code point is a digit.
2611     *
2612     * @param codePoint
2613     *            the code point to check.
2614     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2615     *         otherwise.
2616     */
2617    public static boolean isDigit(int codePoint) {
2618        // Optimized case for ASCII
2619        if ('0' <= codePoint && codePoint <= '9') {
2620            return true;
2621        }
2622        if (codePoint < 1632) {
2623            return false;
2624        }
2625        return isDigitImpl(codePoint);
2626    }
2627
2628    private static native boolean isDigitImpl(int codePoint);
2629
2630    /**
2631     * Indicates whether the specified character is ignorable in a Java or
2632     * Unicode identifier.
2633     *
2634     * @param c
2635     *            the character to check.
2636     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2637     */
2638    public static boolean isIdentifierIgnorable(char c) {
2639        return isIdentifierIgnorable((int) c);
2640    }
2641
2642    /**
2643     * Indicates whether the specified code point is ignorable in a Java or
2644     * Unicode identifier.
2645     *
2646     * @param codePoint
2647     *            the code point to check.
2648     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2649     *         otherwise.
2650     */
2651    public static boolean isIdentifierIgnorable(int codePoint) {
2652        // This is both an optimization and papers over differences between Java and ICU.
2653        if (codePoint < 0x600) {
2654            return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
2655                    (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
2656        }
2657        return isIdentifierIgnorableImpl(codePoint);
2658    }
2659
2660    private static native boolean isIdentifierIgnorableImpl(int codePoint);
2661
2662    /**
2663     * Indicates whether the specified character is an ISO control character.
2664     *
2665     * @param c
2666     *            the character to check.
2667     * @return {@code true} if {@code c} is an ISO control character;
2668     *         {@code false} otherwise.
2669     */
2670    public static boolean isISOControl(char c) {
2671        return isISOControl((int) c);
2672    }
2673
2674    /**
2675     * Indicates whether the specified code point is an ISO control character.
2676     *
2677     * @param c
2678     *            the code point to check.
2679     * @return {@code true} if {@code c} is an ISO control character;
2680     *         {@code false} otherwise.
2681     */
2682    public static boolean isISOControl(int c) {
2683        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2684    }
2685
2686    /**
2687     * Indicates whether the specified character is a valid part of a Java
2688     * identifier other than the first character.
2689     *
2690     * @param c
2691     *            the character to check.
2692     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2693     *         {@code false} otherwise.
2694     */
2695    public static boolean isJavaIdentifierPart(char c) {
2696        return isJavaIdentifierPart((int) c);
2697    }
2698
2699    /**
2700     * Indicates whether the specified code point is a valid part of a Java
2701     * identifier other than the first character.
2702     *
2703     * @param codePoint
2704     *            the code point to check.
2705     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2706     *         {@code false} otherwise.
2707     */
2708    public static boolean isJavaIdentifierPart(int codePoint) {
2709        // Use precomputed bitmasks to optimize the ASCII range.
2710        if (codePoint < 64) {
2711            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
2712        } else if (codePoint < 128) {
2713            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2714        }
2715        int type = getType(codePoint);
2716        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2717                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2718                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2719                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2720                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
2721                || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
2722    }
2723
2724    /**
2725     * Indicates whether the specified character is a valid first character for
2726     * a Java identifier.
2727     *
2728     * @param c
2729     *            the character to check.
2730     * @return {@code true} if {@code c} is a valid first character of a Java
2731     *         identifier; {@code false} otherwise.
2732     */
2733    public static boolean isJavaIdentifierStart(char c) {
2734        return isJavaIdentifierStart((int) c);
2735    }
2736
2737    /**
2738     * Indicates whether the specified code point is a valid first character for
2739     * a Java identifier.
2740     *
2741     * @param codePoint
2742     *            the code point to check.
2743     * @return {@code true} if {@code codePoint} is a valid start of a Java
2744     *         identifier; {@code false} otherwise.
2745     */
2746    public static boolean isJavaIdentifierStart(int codePoint) {
2747        // Use precomputed bitmasks to optimize the ASCII range.
2748        if (codePoint < 64) {
2749            return (codePoint == '$'); // There's only one character in this range.
2750        } else if (codePoint < 128) {
2751            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2752        }
2753        int type = getType(codePoint);
2754        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
2755                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2756    }
2757
2758    /**
2759     * Indicates whether the specified character is a Java letter.
2760     *
2761     * @param c
2762     *            the character to check.
2763     * @return {@code true} if {@code c} is a Java letter; {@code false}
2764     *         otherwise.
2765     * @deprecated Use {@link #isJavaIdentifierStart(char)}
2766     */
2767    @Deprecated
2768    public static boolean isJavaLetter(char c) {
2769        return isJavaIdentifierStart(c);
2770    }
2771
2772    /**
2773     * Indicates whether the specified character is a Java letter or digit
2774     * character.
2775     *
2776     * @param c
2777     *            the character to check.
2778     * @return {@code true} if {@code c} is a Java letter or digit;
2779     *         {@code false} otherwise.
2780     * @deprecated Use {@link #isJavaIdentifierPart(char)}
2781     */
2782    @Deprecated
2783    public static boolean isJavaLetterOrDigit(char c) {
2784        return isJavaIdentifierPart(c);
2785    }
2786
2787    /**
2788     * Indicates whether the specified character is a letter.
2789     *
2790     * @param c
2791     *            the character to check.
2792     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2793     */
2794    public static boolean isLetter(char c) {
2795        return isLetter((int) c);
2796    }
2797
2798    /**
2799     * Indicates whether the specified code point is a letter.
2800     *
2801     * @param codePoint
2802     *            the code point to check.
2803     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2804     *         otherwise.
2805     */
2806    public static boolean isLetter(int codePoint) {
2807        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2808            return true;
2809        }
2810        if (codePoint < 128) {
2811            return false;
2812        }
2813        return isLetterImpl(codePoint);
2814    }
2815
2816    private static native boolean isLetterImpl(int codePoint);
2817
2818    /**
2819     * Indicates whether the specified character is a letter or a digit.
2820     *
2821     * @param c
2822     *            the character to check.
2823     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2824     *         otherwise.
2825     */
2826    public static boolean isLetterOrDigit(char c) {
2827        return isLetterOrDigit((int) c);
2828    }
2829
2830    /**
2831     * Indicates whether the specified code point is a letter or a digit.
2832     *
2833     * @param codePoint
2834     *            the code point to check.
2835     * @return {@code true} if {@code codePoint} is a letter or a digit;
2836     *         {@code false} otherwise.
2837     */
2838    public static boolean isLetterOrDigit(int codePoint) {
2839        // Optimized case for ASCII
2840        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2841            return true;
2842        }
2843        if ('0' <= codePoint && codePoint <= '9') {
2844            return true;
2845        }
2846        if (codePoint < 128) {
2847            return false;
2848        }
2849        return isLetterOrDigitImpl(codePoint);
2850    }
2851
2852    private static native boolean isLetterOrDigitImpl(int codePoint);
2853
2854    /**
2855     * Indicates whether the specified character is a lower case letter.
2856     *
2857     * @param c
2858     *            the character to check.
2859     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2860     *         otherwise.
2861     */
2862    public static boolean isLowerCase(char c) {
2863        return isLowerCase((int) c);
2864    }
2865
2866    /**
2867     * Indicates whether the specified code point is a lower case letter.
2868     *
2869     * @param codePoint
2870     *            the code point to check.
2871     * @return {@code true} if {@code codePoint} is a lower case letter;
2872     *         {@code false} otherwise.
2873     */
2874    public static boolean isLowerCase(int codePoint) {
2875        // Optimized case for ASCII
2876        if ('a' <= codePoint && codePoint <= 'z') {
2877            return true;
2878        }
2879        if (codePoint < 128) {
2880            return false;
2881        }
2882        return isLowerCaseImpl(codePoint);
2883    }
2884
2885    private static native boolean isLowerCaseImpl(int codePoint);
2886
2887    /**
2888     * Indicates whether the specified character is a Java space.
2889     *
2890     * @param c
2891     *            the character to check.
2892     * @return {@code true} if {@code c} is a Java space; {@code false}
2893     *         otherwise.
2894     * @deprecated Use {@link #isWhitespace(char)}
2895     */
2896    @Deprecated
2897    public static boolean isSpace(char c) {
2898        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2899    }
2900
2901    /**
2902     * Indicates whether the specified character is a Unicode space character.
2903     * That is, if it is a member of one of the Unicode categories Space
2904     * Separator, Line Separator, or Paragraph Separator.
2905     *
2906     * @param c
2907     *            the character to check.
2908     * @return {@code true} if {@code c} is a Unicode space character,
2909     *         {@code false} otherwise.
2910     */
2911    public static boolean isSpaceChar(char c) {
2912        return isSpaceChar((int) c);
2913    }
2914
2915    /**
2916     * Indicates whether the specified code point is a Unicode space character.
2917     * That is, if it is a member of one of the Unicode categories Space
2918     * Separator, Line Separator, or Paragraph Separator.
2919     *
2920     * @param codePoint
2921     *            the code point to check.
2922     * @return {@code true} if {@code codePoint} is a Unicode space character,
2923     *         {@code false} otherwise.
2924     */
2925    public static boolean isSpaceChar(int codePoint) {
2926        if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
2927            return true;
2928        }
2929        if (codePoint < 0x2000) {
2930            return false;
2931        }
2932        if (codePoint <= 0xffff) {
2933            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
2934                    codePoint == 0x202f || codePoint == 0x3000;
2935        }
2936        return isSpaceCharImpl(codePoint);
2937    }
2938
2939    private static native boolean isSpaceCharImpl(int codePoint);
2940
2941    /**
2942     * Indicates whether the specified character is a titlecase character.
2943     *
2944     * @param c
2945     *            the character to check.
2946     * @return {@code true} if {@code c} is a titlecase character, {@code false}
2947     *         otherwise.
2948     */
2949    public static boolean isTitleCase(char c) {
2950        return isTitleCaseImpl(c);
2951    }
2952
2953    /**
2954     * Indicates whether the specified code point is a titlecase character.
2955     *
2956     * @param codePoint
2957     *            the code point to check.
2958     * @return {@code true} if {@code codePoint} is a titlecase character,
2959     *         {@code false} otherwise.
2960     */
2961    public static boolean isTitleCase(int codePoint) {
2962        return isTitleCaseImpl(codePoint);
2963    }
2964
2965    private static native boolean isTitleCaseImpl(int codePoint);
2966
2967    /**
2968     * Indicates whether the specified character is valid as part of a Unicode
2969     * identifier other than the first character.
2970     *
2971     * @param c
2972     *            the character to check.
2973     * @return {@code true} if {@code c} is valid as part of a Unicode
2974     *         identifier; {@code false} otherwise.
2975     */
2976    public static boolean isUnicodeIdentifierPart(char c) {
2977        return isUnicodeIdentifierPartImpl(c);
2978    }
2979
2980    /**
2981     * Indicates whether the specified code point is valid as part of a Unicode
2982     * identifier other than the first character.
2983     *
2984     * @param codePoint
2985     *            the code point to check.
2986     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
2987     *         identifier; {@code false} otherwise.
2988     */
2989    public static boolean isUnicodeIdentifierPart(int codePoint) {
2990        return isUnicodeIdentifierPartImpl(codePoint);
2991    }
2992
2993    private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
2994
2995    /**
2996     * Indicates whether the specified character is a valid initial character
2997     * for a Unicode identifier.
2998     *
2999     * @param c
3000     *            the character to check.
3001     * @return {@code true} if {@code c} is a valid first character for a
3002     *         Unicode identifier; {@code false} otherwise.
3003     */
3004    public static boolean isUnicodeIdentifierStart(char c) {
3005        return isUnicodeIdentifierStartImpl(c);
3006    }
3007
3008    /**
3009     * Indicates whether the specified code point is a valid initial character
3010     * for a Unicode identifier.
3011     *
3012     * @param codePoint
3013     *            the code point to check.
3014     * @return {@code true} if {@code codePoint} is a valid first character for
3015     *         a Unicode identifier; {@code false} otherwise.
3016     */
3017    public static boolean isUnicodeIdentifierStart(int codePoint) {
3018        return isUnicodeIdentifierStartImpl(codePoint);
3019    }
3020
3021    private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
3022
3023    /**
3024     * Indicates whether the specified character is an upper case letter.
3025     *
3026     * @param c
3027     *            the character to check.
3028     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3029     *         otherwise.
3030     */
3031    public static boolean isUpperCase(char c) {
3032        return isUpperCase((int) c);
3033    }
3034
3035    /**
3036     * Indicates whether the specified code point is an upper case letter.
3037     *
3038     * @param codePoint
3039     *            the code point to check.
3040     * @return {@code true} if {@code codePoint} is a upper case letter;
3041     *         {@code false} otherwise.
3042     */
3043    public static boolean isUpperCase(int codePoint) {
3044        // Optimized case for ASCII
3045        if ('A' <= codePoint && codePoint <= 'Z') {
3046            return true;
3047        }
3048        if (codePoint < 128) {
3049            return false;
3050        }
3051        return isUpperCaseImpl(codePoint);
3052    }
3053
3054    private static native boolean isUpperCaseImpl(int codePoint);
3055
3056    /**
3057     * Indicates whether the specified character is a whitespace character in
3058     * Java.
3059     *
3060     * @param c
3061     *            the character to check.
3062     * @return {@code true} if the supplied {@code c} is a whitespace character
3063     *         in Java; {@code false} otherwise.
3064     */
3065    public static boolean isWhitespace(char c) {
3066        return isWhitespace((int) c);
3067    }
3068
3069    /**
3070     * Indicates whether the specified code point is a whitespace character in
3071     * Java.
3072     *
3073     * @param codePoint
3074     *            the code point to check.
3075     * @return {@code true} if the supplied {@code c} is a whitespace character
3076     *         in Java; {@code false} otherwise.
3077     */
3078    public static boolean isWhitespace(int codePoint) {
3079        // This is both an optimization and papers over differences between Java and ICU.
3080        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
3081            return true;
3082        }
3083        if (codePoint == 0x1680) {
3084            return true;
3085        }
3086        if (codePoint < 0x2000 || codePoint == 0x2007) {
3087            return false;
3088        }
3089        if (codePoint <= 0xffff) {
3090            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3091                    codePoint == 0x3000;
3092        }
3093        return isWhitespaceImpl(codePoint);
3094    }
3095
3096    private static native boolean isWhitespaceImpl(int codePoint);
3097
3098    /**
3099     * Reverses the order of the first and second byte in the specified
3100     * character.
3101     *
3102     * @param c
3103     *            the character to reverse.
3104     * @return the character with reordered bytes.
3105     */
3106    public static char reverseBytes(char c) {
3107        return (char)((c<<8) | (c>>8));
3108    }
3109
3110    /**
3111     * Returns the lower case equivalent for the specified character if the
3112     * character is an upper case letter. Otherwise, the specified character is
3113     * returned unchanged.
3114     *
3115     * @param c
3116     *            the character
3117     * @return if {@code c} is an upper case character then its lower case
3118     *         counterpart, otherwise just {@code c}.
3119     */
3120    public static char toLowerCase(char c) {
3121        return (char) toLowerCase((int) c);
3122    }
3123
3124    /**
3125     * Returns the lower case equivalent for the specified code point if it is
3126     * an upper case letter. Otherwise, the specified code point is returned
3127     * unchanged.
3128     *
3129     * @param codePoint
3130     *            the code point to check.
3131     * @return if {@code codePoint} is an upper case character then its lower
3132     *         case counterpart, otherwise just {@code codePoint}.
3133     */
3134    public static int toLowerCase(int codePoint) {
3135        // Optimized case for ASCII
3136        if ('A' <= codePoint && codePoint <= 'Z') {
3137            return (char) (codePoint + ('a' - 'A'));
3138        }
3139        if (codePoint < 192) {
3140            return codePoint;
3141        }
3142        return toLowerCaseImpl(codePoint);
3143    }
3144
3145    private static native int toLowerCaseImpl(int codePoint);
3146
3147    @Override
3148    public String toString() {
3149        return String.valueOf(value);
3150    }
3151
3152    /**
3153     * Converts the specified character to its string representation.
3154     *
3155     * @param value
3156     *            the character to convert.
3157     * @return the character converted to a string.
3158     */
3159    public static String toString(char value) {
3160        return String.valueOf(value);
3161    }
3162
3163    /**
3164     * Returns the title case equivalent for the specified character if it
3165     * exists. Otherwise, the specified character is returned unchanged.
3166     *
3167     * @param c
3168     *            the character to convert.
3169     * @return the title case equivalent of {@code c} if it exists, otherwise
3170     *         {@code c}.
3171     */
3172    public static char toTitleCase(char c) {
3173        return (char) toTitleCaseImpl(c);
3174    }
3175
3176    /**
3177     * Returns the title case equivalent for the specified code point if it
3178     * exists. Otherwise, the specified code point is returned unchanged.
3179     *
3180     * @param codePoint
3181     *            the code point to convert.
3182     * @return the title case equivalent of {@code codePoint} if it exists,
3183     *         otherwise {@code codePoint}.
3184     */
3185    public static int toTitleCase(int codePoint) {
3186        return toTitleCaseImpl(codePoint);
3187    }
3188
3189    private static native int toTitleCaseImpl(int codePoint);
3190
3191    /**
3192     * Returns the upper case equivalent for the specified character if the
3193     * character is a lower case letter. Otherwise, the specified character is
3194     * returned unchanged.
3195     *
3196     * @param c
3197     *            the character to convert.
3198     * @return if {@code c} is a lower case character then its upper case
3199     *         counterpart, otherwise just {@code c}.
3200     */
3201    public static char toUpperCase(char c) {
3202        return (char) toUpperCase((int) c);
3203    }
3204
3205    /**
3206     * Returns the upper case equivalent for the specified code point if the
3207     * code point is a lower case letter. Otherwise, the specified code point is
3208     * returned unchanged.
3209     *
3210     * @param codePoint
3211     *            the code point to convert.
3212     * @return if {@code codePoint} is a lower case character then its upper
3213     *         case counterpart, otherwise just {@code codePoint}.
3214     */
3215    public static int toUpperCase(int codePoint) {
3216        // Optimized case for ASCII
3217        if ('a' <= codePoint && codePoint <= 'z') {
3218            return (char) (codePoint - ('a' - 'A'));
3219        }
3220        if (codePoint < 181) {
3221            return codePoint;
3222        }
3223        return toUpperCaseImpl(codePoint);
3224    }
3225
3226    private static native int toUpperCaseImpl(int codePoint);
3227}
3228