Character.java revision fed2ee9e249b952c76f11110c41b33c4829aa56f
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21import java.util.Arrays;
22
23/**
24 * The wrapper for the primitive type {@code char}. This class also provides a
25 * number of utility methods for working with characters.
26 *
27 * <p>Character data is kept up to date as Unicode evolves.
28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
29 * the {@code Locale} documentation for details of the Unicode versions implemented by current
30 * and historical Android releases.
31 *
32 * <p>The Unicode specification, character tables, and other information are available at
33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
34 *
35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
39 * encoding and {@code char} pairs are used to represent code points in the
40 * supplementary range. A pair of {@code char} values that represent a
41 * supplementary character are made up of a <i>high surrogate</i> with a value
42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
43 * 0xDC00 to 0xDFFF.
44 * <p>
45 * On the Java platform a {@code char} value represents either a single BMP code
46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
47 * is used to represent all Unicode code points.
48 *
49 * <a name="unicode_categories"><h3>Unicode categories</h3></a>
50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
51 * grouped semantically to provide a convenient overview. This table is also useful in
52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
53 * <span class="datatable">
54 * <style type="text/css">
55 * .datatable td { padding-right: 20px; }
56 * </style>
57 * <p><table>
58 * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
59 * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
60 * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
62 * <tr> <td> Cs </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
63 * <tr> <td><br></td> </tr>
64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
67 * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
68 * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
69 * <tr> <td><br></td> </tr>
70 * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
71 * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
73 * <tr> <td><br></td> </tr>
74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
75 * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
76 * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
77 * <tr> <td><br></td> </tr>
78 * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
79 * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
80 * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
81 * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
83 * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
84 * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
85 * <tr> <td><br></td> </tr>
86 * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
89 * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
90 * <tr> <td><br></td> </tr>
91 * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
92 * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
94 * </table>
95 * </span>
96 *
97 * @since 1.0
98 */
99@FindBugsSuppressWarnings("DM_NUMBER_CTOR")
100public final class Character implements Serializable, Comparable<Character> {
101    private static final long serialVersionUID = 3786198910865385080L;
102
103    private final char value;
104
105    /**
106     * The minimum {@code Character} value.
107     */
108    public static final char MIN_VALUE = '\u0000';
109
110    /**
111     * The maximum {@code Character} value.
112     */
113    public static final char MAX_VALUE = '\uffff';
114
115    /**
116     * The minimum radix used for conversions between characters and integers.
117     */
118    public static final int MIN_RADIX = 2;
119
120    /**
121     * The maximum radix used for conversions between characters and integers.
122     */
123    public static final int MAX_RADIX = 36;
124
125    /**
126     * The {@link Class} object that represents the primitive type {@code char}.
127     */
128    @SuppressWarnings("unchecked")
129    public static final Class<Character> TYPE
130            = (Class<Character>) char[].class.getComponentType();
131    // Note: Character.TYPE can't be set to "char.class", since *that* is
132    // defined to be "java.lang.Character.TYPE";
133
134    /**
135     * Unicode category constant Cn.
136     */
137    public static final byte UNASSIGNED = 0;
138
139    /**
140     * Unicode category constant Lu.
141     */
142    public static final byte UPPERCASE_LETTER = 1;
143
144    /**
145     * Unicode category constant Ll.
146     */
147    public static final byte LOWERCASE_LETTER = 2;
148
149    /**
150     * Unicode category constant Lt.
151     */
152    public static final byte TITLECASE_LETTER = 3;
153
154    /**
155     * Unicode category constant Lm.
156     */
157    public static final byte MODIFIER_LETTER = 4;
158
159    /**
160     * Unicode category constant Lo.
161     */
162    public static final byte OTHER_LETTER = 5;
163
164    /**
165     * Unicode category constant Mn.
166     */
167    public static final byte NON_SPACING_MARK = 6;
168
169    /**
170     * Unicode category constant Me.
171     */
172    public static final byte ENCLOSING_MARK = 7;
173
174    /**
175     * Unicode category constant Mc.
176     */
177    public static final byte COMBINING_SPACING_MARK = 8;
178
179    /**
180     * Unicode category constant Nd.
181     */
182    public static final byte DECIMAL_DIGIT_NUMBER = 9;
183
184    /**
185     * Unicode category constant Nl.
186     */
187    public static final byte LETTER_NUMBER = 10;
188
189    /**
190     * Unicode category constant No.
191     */
192    public static final byte OTHER_NUMBER = 11;
193
194    /**
195     * Unicode category constant Zs.
196     */
197    public static final byte SPACE_SEPARATOR = 12;
198
199    /**
200     * Unicode category constant Zl.
201     */
202    public static final byte LINE_SEPARATOR = 13;
203
204    /**
205     * Unicode category constant Zp.
206     */
207    public static final byte PARAGRAPH_SEPARATOR = 14;
208
209    /**
210     * Unicode category constant Cc.
211     */
212    public static final byte CONTROL = 15;
213
214    /**
215     * Unicode category constant Cf.
216     */
217    public static final byte FORMAT = 16;
218
219    /**
220     * Unicode category constant Co.
221     */
222    public static final byte PRIVATE_USE = 18;
223
224    /**
225     * Unicode category constant Cs.
226     */
227    public static final byte SURROGATE = 19;
228
229    /**
230     * Unicode category constant Pd.
231     */
232    public static final byte DASH_PUNCTUATION = 20;
233
234    /**
235     * Unicode category constant Ps.
236     */
237    public static final byte START_PUNCTUATION = 21;
238
239    /**
240     * Unicode category constant Pe.
241     */
242    public static final byte END_PUNCTUATION = 22;
243
244    /**
245     * Unicode category constant Pc.
246     */
247    public static final byte CONNECTOR_PUNCTUATION = 23;
248
249    /**
250     * Unicode category constant Po.
251     */
252    public static final byte OTHER_PUNCTUATION = 24;
253
254    /**
255     * Unicode category constant Sm.
256     */
257    public static final byte MATH_SYMBOL = 25;
258
259    /**
260     * Unicode category constant Sc.
261     */
262    public static final byte CURRENCY_SYMBOL = 26;
263
264    /**
265     * Unicode category constant Sk.
266     */
267    public static final byte MODIFIER_SYMBOL = 27;
268
269    /**
270     * Unicode category constant So.
271     */
272    public static final byte OTHER_SYMBOL = 28;
273
274    /**
275     * Unicode category constant Pi.
276     *
277     * @since 1.4
278     */
279    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
280
281    /**
282     * Unicode category constant Pf.
283     *
284     * @since 1.4
285     */
286    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
287
288    /**
289     * Unicode bidirectional constant.
290     *
291     * @since 1.4
292     */
293    public static final byte DIRECTIONALITY_UNDEFINED = -1;
294
295    /**
296     * Unicode bidirectional constant L.
297     *
298     * @since 1.4
299     */
300    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
301
302    /**
303     * Unicode bidirectional constant R.
304     *
305     * @since 1.4
306     */
307    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
308
309    /**
310     * Unicode bidirectional constant AL.
311     *
312     * @since 1.4
313     */
314    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
315
316    /**
317     * Unicode bidirectional constant EN.
318     *
319     * @since 1.4
320     */
321    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
322
323    /**
324     * Unicode bidirectional constant ES.
325     *
326     * @since 1.4
327     */
328    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
329
330    /**
331     * Unicode bidirectional constant ET.
332     *
333     * @since 1.4
334     */
335    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
336
337    /**
338     * Unicode bidirectional constant AN.
339     *
340     * @since 1.4
341     */
342    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
343
344    /**
345     * Unicode bidirectional constant CS.
346     *
347     * @since 1.4
348     */
349    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
350
351    /**
352     * Unicode bidirectional constant NSM.
353     *
354     * @since 1.4
355     */
356    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
357
358    /**
359     * Unicode bidirectional constant BN.
360     *
361     * @since 1.4
362     */
363    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
364
365    /**
366     * Unicode bidirectional constant B.
367     *
368     * @since 1.4
369     */
370    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
371
372    /**
373     * Unicode bidirectional constant S.
374     *
375     * @since 1.4
376     */
377    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
378
379    /**
380     * Unicode bidirectional constant WS.
381     *
382     * @since 1.4
383     */
384    public static final byte DIRECTIONALITY_WHITESPACE = 12;
385
386    /**
387     * Unicode bidirectional constant ON.
388     *
389     * @since 1.4
390     */
391    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
392
393    /**
394     * Unicode bidirectional constant LRE.
395     *
396     * @since 1.4
397     */
398    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
399
400    /**
401     * Unicode bidirectional constant LRO.
402     *
403     * @since 1.4
404     */
405    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
406
407    /**
408     * Unicode bidirectional constant RLE.
409     *
410     * @since 1.4
411     */
412    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
413
414    /**
415     * Unicode bidirectional constant RLO.
416     *
417     * @since 1.4
418     */
419    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
420
421    /**
422     * Unicode bidirectional constant PDF.
423     *
424     * @since 1.4
425     */
426    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
427
428    /**
429     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
430     * encoding, {@code '\uD800'}.
431     *
432     * @since 1.5
433     */
434    public static final char MIN_HIGH_SURROGATE = '\uD800';
435
436    /**
437     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
438     * encoding, {@code '\uDBFF'}.
439     *
440     * @since 1.5
441     */
442    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
443
444    /**
445     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
446     * encoding, {@code '\uDC00'}.
447     *
448     * @since 1.5
449     */
450    public static final char MIN_LOW_SURROGATE = '\uDC00';
451
452    /**
453     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
454     * encoding, {@code '\uDFFF'}.
455     *
456     * @since 1.5
457     */
458    public static final char MAX_LOW_SURROGATE = '\uDFFF';
459
460    /**
461     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
462     *
463     * @since 1.5
464     */
465    public static final char MIN_SURROGATE = '\uD800';
466
467    /**
468     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
469     *
470     * @since 1.5
471     */
472    public static final char MAX_SURROGATE = '\uDFFF';
473
474    /**
475     * The minimum value of a supplementary code point, {@code U+010000}.
476     *
477     * @since 1.5
478     */
479    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
480
481    /**
482     * The minimum code point value, {@code U+0000}.
483     *
484     * @since 1.5
485     */
486    public static final int MIN_CODE_POINT = 0x000000;
487
488    /**
489     * The maximum code point value, {@code U+10FFFF}.
490     *
491     * @since 1.5
492     */
493    public static final int MAX_CODE_POINT = 0x10FFFF;
494
495    /**
496     * The number of bits required to represent a {@code Character} value
497     * unsigned form.
498     *
499     * @since 1.5
500     */
501    public static final int SIZE = 16;
502
503    private static final byte[] DIRECTIONALITY = new byte[] {
504            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
505            DIRECTIONALITY_EUROPEAN_NUMBER,
506            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
507            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
508            DIRECTIONALITY_ARABIC_NUMBER,
509            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
510            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
511            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
512            DIRECTIONALITY_OTHER_NEUTRALS,
513            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
514            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
515            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
516            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
517            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
518            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
519            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
520
521    /*
522     * Represents a subset of the Unicode character set.
523     */
524    public static class Subset {
525        private final String name;
526
527        /**
528         * Constructs a new {@code Subset}.
529         */
530        protected Subset(String name) {
531            if (name == null) {
532                throw new NullPointerException("name == null");
533            }
534            this.name = name;
535        }
536
537        /**
538         * Compares this character subset for identity with the specified object.
539         */
540        @Override public final boolean equals(Object object) {
541            return object == this;
542        }
543
544        /**
545         * Returns this subset's hash code, which is the hash code computed by
546         *         {@link java.lang.Object#hashCode()}.
547         */
548        @Override public final int hashCode() {
549            return super.hashCode();
550        }
551
552        /**
553         * Returns this subset's name.
554         */
555        @Override public final String toString() {
556            return name;
557        }
558    }
559
560    /**
561     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
562     * specification.
563     *
564     * @since 1.2
565     */
566    public static final class UnicodeBlock extends Subset {
567        /**
568         * The &quot;Surrogates Area&quot; Unicode Block.
569         *
570         * @deprecated As of Java 5, this block has been replaced by
571         *             {@link #HIGH_SURROGATES},
572         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
573         *             {@link #LOW_SURROGATES}.
574         */
575        @Deprecated
576        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA");
577        /**
578         * The &quot;Basic Latin&quot; Unicode Block.
579         *
580         * @since 1.2
581         */
582        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN");
583        /**
584         * The &quot;Latin-1 Supplement&quot; Unicode Block.
585         *
586         * @since 1.2
587         */
588        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT");
589        /**
590         * The &quot;Latin Extended-A&quot; Unicode Block.
591         *
592         * @since 1.2
593         */
594        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A");
595        /**
596         * The &quot;Latin Extended-B&quot; Unicode Block.
597         *
598         * @since 1.2
599         */
600        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B");
601        /**
602         * The &quot;IPA Extensions&quot; Unicode Block.
603         *
604         * @since 1.2
605         */
606        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS");
607        /**
608         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
609         *
610         * @since 1.2
611         */
612        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS");
613        /**
614         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
615         *
616         * @since 1.2
617         */
618        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS");
619        /**
620         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
621         * to as &quot;Greek&quot;.
622         *
623         * @since 1.2
624         */
625        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK");
626        /**
627         * The &quot;Cyrillic&quot; Unicode Block.
628         *
629         * @since 1.2
630         */
631        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC");
632        /**
633         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
634         * referred to as &quot;Cyrillic Supplementary&quot;.
635         *
636         * @since 1.5
637         */
638        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY");
639        /**
640         * The &quot;Armenian&quot; Unicode Block.
641         *
642         * @since 1.2
643         */
644        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN");
645        /**
646         * The &quot;Hebrew&quot; Unicode Block.
647         *
648         * @since 1.2
649         */
650        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW");
651        /**
652         * The &quot;Arabic&quot; Unicode Block.
653         *
654         * @since 1.2
655         */
656        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC");
657        /**
658         * The &quot;Syriac&quot; Unicode Block.
659         *
660         * @since 1.4
661         */
662        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC");
663        /**
664         * The &quot;Thaana&quot; Unicode Block.
665         *
666         * @since 1.4
667         */
668        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA");
669        /**
670         * The &quot;Devanagari&quot; Unicode Block.
671         *
672         * @since 1.2
673         */
674        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI");
675        /**
676         * The &quot;Bengali&quot; Unicode Block.
677         *
678         * @since 1.2
679         */
680        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI");
681        /**
682         * The &quot;Gurmukhi&quot; Unicode Block.
683         *
684         * @since 1.2
685         */
686        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI");
687        /**
688         * The &quot;Gujarati&quot; Unicode Block.
689         *
690         * @since 1.2
691         */
692        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI");
693        /**
694         * The &quot;Oriya&quot; Unicode Block.
695         *
696         * @since 1.2
697         */
698        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA");
699        /**
700         * The &quot;Tamil&quot; Unicode Block.
701         *
702         * @since 1.2
703         */
704        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL");
705        /**
706         * The &quot;Telugu&quot; Unicode Block.
707         *
708         * @since 1.2
709         */
710        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU");
711        /**
712         * The &quot;Kannada&quot; Unicode Block.
713         *
714         * @since 1.2
715         */
716        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA");
717        /**
718         * The &quot;Malayalam&quot; Unicode Block.
719         *
720         * @since 1.2
721         */
722        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM");
723        /**
724         * The &quot;Sinhala&quot; Unicode Block.
725         *
726         * @since 1.4
727         */
728        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA");
729        /**
730         * The &quot;Thai&quot; Unicode Block.
731         *
732         * @since 1.2
733         */
734        public static final UnicodeBlock THAI = new UnicodeBlock("THAI");
735        /**
736         * The &quot;Lao&quot; Unicode Block.
737         *
738         * @since 1.2
739         */
740        public static final UnicodeBlock LAO = new UnicodeBlock("LAO");
741        /**
742         * The &quot;Tibetan&quot; Unicode Block.
743         *
744         * @since 1.2
745         */
746        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN");
747        /**
748         * The &quot;Myanmar&quot; Unicode Block.
749         *
750         * @since 1.4
751         */
752        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR");
753        /**
754         * The &quot;Georgian&quot; Unicode Block.
755         *
756         * @since 1.2
757         */
758        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN");
759        /**
760         * The &quot;Hangul Jamo&quot; Unicode Block.
761         *
762         * @since 1.2
763         */
764        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO");
765        /**
766         * The &quot;Ethiopic&quot; Unicode Block.
767         *
768         * @since 1.4
769         */
770        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC");
771        /**
772         * The &quot;Cherokee&quot; Unicode Block.
773         *
774         * @since 1.4
775         */
776        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE");
777        /**
778         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
779         *
780         * @since 1.4
781         */
782        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
783        /**
784         * The &quot;Ogham&quot; Unicode Block.
785         *
786         * @since 1.4
787         */
788        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM");
789        /**
790         * The &quot;Runic&quot; Unicode Block.
791         *
792         * @since 1.4
793         */
794        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC");
795        /**
796         * The &quot;Tagalog&quot; Unicode Block.
797         *
798         * @since 1.5
799         */
800        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG");
801        /**
802         * The &quot;Hanunoo&quot; Unicode Block.
803         *
804         * @since 1.5
805         */
806        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO");
807        /**
808         * The &quot;Buhid&quot; Unicode Block.
809         *
810         * @since 1.5
811         */
812        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID");
813        /**
814         * The &quot;Tagbanwa&quot; Unicode Block.
815         *
816         * @since 1.5
817         */
818        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA");
819        /**
820         * The &quot;Khmer&quot; Unicode Block.
821         *
822         * @since 1.4
823         */
824        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER");
825        /**
826         * The &quot;Mongolian&quot; Unicode Block.
827         *
828         * @since 1.4
829         */
830        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN");
831        /**
832         * The &quot;Limbu&quot; Unicode Block.
833         *
834         * @since 1.5
835         */
836        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU");
837        /**
838         * The &quot;Tai Le&quot; Unicode Block.
839         *
840         * @since 1.5
841         */
842        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE");
843        /**
844         * The &quot;Khmer Symbols&quot; Unicode Block.
845         *
846         * @since 1.5
847         */
848        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS");
849        /**
850         * The &quot;Phonetic Extensions&quot; Unicode Block.
851         *
852         * @since 1.5
853         */
854        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS");
855        /**
856         * The &quot;Latin Extended Additional&quot; Unicode Block.
857         *
858         * @since 1.2
859         */
860        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL");
861        /**
862         * The &quot;Greek Extended&quot; Unicode Block.
863         *
864         * @since 1.2
865         */
866        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED");
867        /**
868         * The &quot;General Punctuation&quot; Unicode Block.
869         *
870         * @since 1.2
871         */
872        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION");
873        /**
874         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
875         *
876         * @since 1.2
877         */
878        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS");
879        /**
880         * The &quot;Currency Symbols&quot; Unicode Block.
881         *
882         * @since 1.2
883         */
884        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS");
885        /**
886         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
887         * Block. Previously referred to as &quot;Combining Marks for
888         * Symbols&quot;.
889         *
890         * @since 1.2
891         */
892        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS");
893        /**
894         * The &quot;Letterlike Symbols&quot; Unicode Block.
895         *
896         * @since 1.2
897         */
898        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS");
899        /**
900         * The &quot;Number Forms&quot; Unicode Block.
901         *
902         * @since 1.2
903         */
904        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS");
905        /**
906         * The &quot;Arrows&quot; Unicode Block.
907         *
908         * @since 1.2
909         */
910        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS");
911        /**
912         * The &quot;Mathematical Operators&quot; Unicode Block.
913         *
914         * @since 1.2
915         */
916        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS");
917        /**
918         * The &quot;Miscellaneous Technical&quot; Unicode Block.
919         *
920         * @since 1.2
921         */
922        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL");
923        /**
924         * The &quot;Control Pictures&quot; Unicode Block.
925         *
926         * @since 1.2
927         */
928        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES");
929        /**
930         * The &quot;Optical Character Recognition&quot; Unicode Block.
931         *
932         * @since 1.2
933         */
934        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION");
935        /**
936         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
937         *
938         * @since 1.2
939         */
940        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS");
941        /**
942         * The &quot;Box Drawing&quot; Unicode Block.
943         *
944         * @since 1.2
945         */
946        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING");
947        /**
948         * The &quot;Block Elements&quot; Unicode Block.
949         *
950         * @since 1.2
951         */
952        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS");
953        /**
954         * The &quot;Geometric Shapes&quot; Unicode Block.
955         *
956         * @since 1.2
957         */
958        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES");
959        /**
960         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
961         *
962         * @since 1.2
963         */
964        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS");
965        /**
966         * The &quot;Dingbats&quot; Unicode Block.
967         *
968         * @since 1.2
969         */
970        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS");
971        /**
972         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
973         *
974         * @since 1.5
975         */
976        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A");
977        /**
978         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
979         *
980         * @since 1.5
981         */
982        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A");
983        /**
984         * The &quot;Braille Patterns&quot; Unicode Block.
985         *
986         * @since 1.4
987         */
988        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS");
989        /**
990         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
991         *
992         * @since 1.5
993         */
994        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B");
995        /**
996         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
997         *
998         * @since 1.5
999         */
1000        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B");
1001        /**
1002         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1003         *
1004         * @since 1.5
1005         */
1006        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS");
1007        /**
1008         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1009         *
1010         * @since 1.2
1011         */
1012        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS");
1013        /**
1014         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1015         *
1016         * @since 1.4
1017         */
1018        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT");
1019        /**
1020         * The &quot;Kangxi Radicals&quot; Unicode Block.
1021         *
1022         * @since 1.4
1023         */
1024        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS");
1025        /**
1026         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1027         *
1028         * @since 1.4
1029         */
1030        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
1031        /**
1032         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1033         *
1034         * @since 1.2
1035         */
1036        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION");
1037        /**
1038         * The &quot;Hiragana&quot; Unicode Block.
1039         *
1040         * @since 1.2
1041         */
1042        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA");
1043        /**
1044         * The &quot;Katakana&quot; Unicode Block.
1045         *
1046         * @since 1.2
1047         */
1048        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA");
1049        /**
1050         * The &quot;Bopomofo&quot; Unicode Block.
1051         *
1052         * @since 1.2
1053         */
1054        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO");
1055        /**
1056         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1057         *
1058         * @since 1.2
1059         */
1060        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO");
1061        /**
1062         * The &quot;Kanbun&quot; Unicode Block.
1063         *
1064         * @since 1.2
1065         */
1066        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN");
1067        /**
1068         * The &quot;Bopomofo Extended&quot; Unicode Block.
1069         *
1070         * @since 1.4
1071         */
1072        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED");
1073        /**
1074         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1075         *
1076         * @since 1.5
1077         */
1078        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS");
1079        /**
1080         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1081         *
1082         * @since 1.2
1083         */
1084        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS");
1085        /**
1086         * The &quot;CJK Compatibility&quot; Unicode Block.
1087         *
1088         * @since 1.2
1089         */
1090        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY");
1091        /**
1092         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1093         *
1094         * @since 1.4
1095         */
1096        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
1097        /**
1098         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1099         *
1100         * @since 1.5
1101         */
1102        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS");
1103        /**
1104         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1105         *
1106         * @since 1.2
1107         */
1108        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS");
1109        /**
1110         * The &quot;Yi Syllables&quot; Unicode Block.
1111         *
1112         * @since 1.4
1113         */
1114        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES");
1115        /**
1116         * The &quot;Yi Radicals&quot; Unicode Block.
1117         *
1118         * @since 1.4
1119         */
1120        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS");
1121        /**
1122         * The &quot;Hangul Syllables&quot; Unicode Block.
1123         *
1124         * @since 1.2
1125         */
1126        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES");
1127        /**
1128         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1129         * code point values in the high surrogate range 0xD800 to 0xDB7F
1130         */
1131        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES");
1132        /**
1133         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1134         * represents code point values in the high surrogate range 0xDB80 to
1135         * 0xDBFF
1136         */
1137        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES");
1138        /**
1139         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1140         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1141         */
1142        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES");
1143        /**
1144         * The &quot;Private Use Area&quot; Unicode Block.
1145         *
1146         * @since 1.2
1147         */
1148        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA");
1149        /**
1150         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1151         *
1152         * @since 1.2
1153         */
1154        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS");
1155        /**
1156         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1157         *
1158         * @since 1.2
1159         */
1160        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS");
1161        /**
1162         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1163         *
1164         * @since 1.2
1165         */
1166        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A");
1167        /**
1168         * The &quot;Variation Selectors&quot; Unicode Block.
1169         *
1170         * @since 1.5
1171         */
1172        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS");
1173        /**
1174         * The &quot;Combining Half Marks&quot; Unicode Block.
1175         *
1176         * @since 1.2
1177         */
1178        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS");
1179        /**
1180         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1181         *
1182         * @since 1.2
1183         */
1184        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS");
1185        /**
1186         * The &quot;Small Form Variants&quot; Unicode Block.
1187         *
1188         * @since 1.2
1189         */
1190        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS");
1191        /**
1192         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1193         *
1194         * @since 1.2
1195         */
1196        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B");
1197        /**
1198         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1199         *
1200         * @since 1.2
1201         */
1202        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS");
1203        /**
1204         * The &quot;Specials&quot; Unicode Block.
1205         *
1206         * @since 1.2
1207         */
1208        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS");
1209        /**
1210         * The &quot;Linear B Syllabary&quot; Unicode Block.
1211         *
1212         * @since 1.2
1213         */
1214        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY");
1215        /**
1216         * The &quot;Linear B Ideograms&quot; Unicode Block.
1217         *
1218         * @since 1.5
1219         */
1220        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS");
1221        /**
1222         * The &quot;Aegean Numbers&quot; Unicode Block.
1223         *
1224         * @since 1.5
1225         */
1226        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS");
1227        /**
1228         * The &quot;Old Italic&quot; Unicode Block.
1229         *
1230         * @since 1.5
1231         */
1232        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC");
1233        /**
1234         * The &quot;Gothic&quot; Unicode Block.
1235         *
1236         * @since 1.5
1237         */
1238        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
1239        /**
1240         * The &quot;Ugaritic&quot; Unicode Block.
1241         *
1242         * @since 1.5
1243         */
1244        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
1245        /**
1246         * The &quot;Deseret&quot; Unicode Block.
1247         *
1248         * @since 1.5
1249         */
1250        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
1251        /**
1252         * The &quot;Shavian&quot; Unicode Block.
1253         *
1254         * @since 1.5
1255         */
1256        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
1257        /**
1258         * The &quot;Osmanya&quot; Unicode Block.
1259         *
1260         * @since 1.5
1261         */
1262        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
1263        /**
1264         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1265         *
1266         * @since 1.5
1267         */
1268        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY");
1269        /**
1270         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1271         *
1272         * @since 1.5
1273         */
1274        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS");
1275        /**
1276         * The &quot;Musical Symbols&quot; Unicode Block.
1277         *
1278         * @since 1.5
1279         */
1280        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS");
1281        /**
1282         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1283         *
1284         * @since 1.5
1285         */
1286        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS");
1287        /**
1288         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1289         *
1290         * @since 1.5
1291         */
1292        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS");
1293        /**
1294         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1295         *
1296         * @since 1.5
1297         */
1298        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B");
1299        /**
1300         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1301         *
1302         * @since 1.5
1303         */
1304        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT");
1305        /**
1306         * The &quot;Tags&quot; Unicode Block.
1307         *
1308         * @since 1.5
1309         */
1310        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1311        /**
1312         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1313         *
1314         * @since 1.5
1315         */
1316        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT");
1317        /**
1318         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1319         *
1320         * @since 1.5
1321         */
1322        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A");
1323        /**
1324         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1325         *
1326         * @since 1.5
1327         */
1328        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B");
1329
1330        /*
1331         * All of the UnicodeBlocks with valid ranges in ascending order.
1332         */
1333        private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
1334            null,
1335            UnicodeBlock.BASIC_LATIN,
1336            UnicodeBlock.LATIN_1_SUPPLEMENT,
1337            UnicodeBlock.LATIN_EXTENDED_A,
1338            UnicodeBlock.LATIN_EXTENDED_B,
1339            UnicodeBlock.IPA_EXTENSIONS,
1340            UnicodeBlock.SPACING_MODIFIER_LETTERS,
1341            UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
1342            UnicodeBlock.GREEK,
1343            UnicodeBlock.CYRILLIC,
1344            UnicodeBlock.ARMENIAN,
1345            UnicodeBlock.HEBREW,
1346            UnicodeBlock.ARABIC,
1347            UnicodeBlock.SYRIAC,
1348            UnicodeBlock.THAANA,
1349            UnicodeBlock.DEVANAGARI,
1350            UnicodeBlock.BENGALI,
1351            UnicodeBlock.GURMUKHI,
1352            UnicodeBlock.GUJARATI,
1353            UnicodeBlock.ORIYA,
1354            UnicodeBlock.TAMIL,
1355            UnicodeBlock.TELUGU,
1356            UnicodeBlock.KANNADA,
1357            UnicodeBlock.MALAYALAM,
1358            UnicodeBlock.SINHALA,
1359            UnicodeBlock.THAI,
1360            UnicodeBlock.LAO,
1361            UnicodeBlock.TIBETAN,
1362            UnicodeBlock.MYANMAR,
1363            UnicodeBlock.GEORGIAN,
1364            UnicodeBlock.HANGUL_JAMO,
1365            UnicodeBlock.ETHIOPIC,
1366            UnicodeBlock.CHEROKEE,
1367            UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1368            UnicodeBlock.OGHAM,
1369            UnicodeBlock.RUNIC,
1370            UnicodeBlock.KHMER,
1371            UnicodeBlock.MONGOLIAN,
1372            UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
1373            UnicodeBlock.GREEK_EXTENDED,
1374            UnicodeBlock.GENERAL_PUNCTUATION,
1375            UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
1376            UnicodeBlock.CURRENCY_SYMBOLS,
1377            UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
1378            UnicodeBlock.LETTERLIKE_SYMBOLS,
1379            UnicodeBlock.NUMBER_FORMS,
1380            UnicodeBlock.ARROWS,
1381            UnicodeBlock.MATHEMATICAL_OPERATORS,
1382            UnicodeBlock.MISCELLANEOUS_TECHNICAL,
1383            UnicodeBlock.CONTROL_PICTURES,
1384            UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
1385            UnicodeBlock.ENCLOSED_ALPHANUMERICS,
1386            UnicodeBlock.BOX_DRAWING,
1387            UnicodeBlock.BLOCK_ELEMENTS,
1388            UnicodeBlock.GEOMETRIC_SHAPES,
1389            UnicodeBlock.MISCELLANEOUS_SYMBOLS,
1390            UnicodeBlock.DINGBATS,
1391            UnicodeBlock.BRAILLE_PATTERNS,
1392            UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
1393            UnicodeBlock.KANGXI_RADICALS,
1394            UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1395            UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
1396            UnicodeBlock.HIRAGANA,
1397            UnicodeBlock.KATAKANA,
1398            UnicodeBlock.BOPOMOFO,
1399            UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
1400            UnicodeBlock.KANBUN,
1401            UnicodeBlock.BOPOMOFO_EXTENDED,
1402            UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
1403            UnicodeBlock.CJK_COMPATIBILITY,
1404            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1405            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
1406            UnicodeBlock.YI_SYLLABLES,
1407            UnicodeBlock.YI_RADICALS,
1408            UnicodeBlock.HANGUL_SYLLABLES,
1409            UnicodeBlock.HIGH_SURROGATES,
1410            UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
1411            UnicodeBlock.LOW_SURROGATES,
1412            UnicodeBlock.PRIVATE_USE_AREA,
1413            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
1414            UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
1415            UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
1416            UnicodeBlock.COMBINING_HALF_MARKS,
1417            UnicodeBlock.CJK_COMPATIBILITY_FORMS,
1418            UnicodeBlock.SMALL_FORM_VARIANTS,
1419            UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
1420            UnicodeBlock.SPECIALS,
1421            UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
1422            UnicodeBlock.OLD_ITALIC,
1423            UnicodeBlock.GOTHIC,
1424            UnicodeBlock.DESERET,
1425            UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
1426            UnicodeBlock.MUSICAL_SYMBOLS,
1427            UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1428            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1429            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1430            UnicodeBlock.TAGS,
1431            UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
1432            UnicodeBlock.TAGALOG,
1433            UnicodeBlock.HANUNOO,
1434            UnicodeBlock.BUHID,
1435            UnicodeBlock.TAGBANWA,
1436            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1437            UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
1438            UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
1439            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1440            UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1441            UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
1442            UnicodeBlock.VARIATION_SELECTORS,
1443            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1444            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1445            UnicodeBlock.LIMBU,
1446            UnicodeBlock.TAI_LE,
1447            UnicodeBlock.KHMER_SYMBOLS,
1448            UnicodeBlock.PHONETIC_EXTENSIONS,
1449            UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1450            UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
1451            UnicodeBlock.LINEAR_B_SYLLABARY,
1452            UnicodeBlock.LINEAR_B_IDEOGRAMS,
1453            UnicodeBlock.AEGEAN_NUMBERS,
1454            UnicodeBlock.UGARITIC,
1455            UnicodeBlock.SHAVIAN,
1456            UnicodeBlock.OSMANYA,
1457            UnicodeBlock.CYPRIOT_SYLLABARY,
1458            UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
1459            UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
1460        };
1461
1462        /**
1463         * Retrieves the constant that corresponds to the specified block name.
1464         * The block names are defined by the Unicode 4.0.1 specification in the
1465         * {@code Blocks-4.0.1.txt} file.
1466         * <p>
1467         * Block names may be one of the following:
1468         * <ul>
1469         * <li>Canonical block name, as defined by the Unicode specification;
1470         * case-insensitive.</li>
1471         * <li>Canonical block name without any spaces, as defined by the
1472         * Unicode specification; case-insensitive.</li>
1473         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1474         * converting the canonical name to uppercase and replacing all spaces and hyphens
1475         * with underscores.</li>
1476         * </ul>
1477         *
1478         * @param blockName
1479         *            the name of the block to retrieve.
1480         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1481         * @throws NullPointerException
1482         *             if {@code blockName} is {@code null}.
1483         * @throws IllegalArgumentException
1484         *             if {@code blockName} is not a valid block name.
1485         * @since 1.5
1486         */
1487        public static UnicodeBlock forName(String blockName) {
1488            if (blockName == null) {
1489                throw new NullPointerException("blockName == null");
1490            }
1491            int block = forNameImpl(blockName);
1492            if (block == -1) {
1493                if (blockName.equals("SURROGATES_AREA")) {
1494                    return SURROGATES_AREA;
1495                } else if(blockName.equalsIgnoreCase("greek")) {
1496                    return GREEK;
1497                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1498                        blockName.equals("Combining Marks for Symbols") ||
1499                        blockName.equals("CombiningMarksforSymbols")) {
1500                    return COMBINING_MARKS_FOR_SYMBOLS;
1501                }
1502                throw new IllegalArgumentException("Bad block name: " + blockName);
1503            }
1504            return BLOCKS[block];
1505        }
1506
1507        /**
1508         * Gets the constant for the Unicode block that contains the specified
1509         * character.
1510         *
1511         * @param c
1512         *            the character for which to get the {@code UnicodeBlock}
1513         *            constant.
1514         * @return the {@code UnicodeBlock} constant for the block that contains
1515         *         {@code c}, or {@code null} if {@code c} does not belong to
1516         *         any defined block.
1517         */
1518        public static UnicodeBlock of(char c) {
1519            return of((int) c);
1520        }
1521
1522        /**
1523         * Gets the constant for the Unicode block that contains the specified
1524         * Unicode code point.
1525         *
1526         * @param codePoint
1527         *            the Unicode code point for which to get the
1528         *            {@code UnicodeBlock} constant.
1529         * @return the {@code UnicodeBlock} constant for the block that contains
1530         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1531         *         not belong to any defined block.
1532         * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
1533         * @since 1.5
1534         */
1535        public static UnicodeBlock of(int codePoint) {
1536            checkValidCodePoint(codePoint);
1537            int block = ofImpl(codePoint);
1538            if (block == -1 || block >= BLOCKS.length) {
1539                return null;
1540            }
1541            return BLOCKS[block];
1542        }
1543
1544        private UnicodeBlock(String blockName) {
1545            super(blockName);
1546        }
1547    }
1548
1549    private static native int forNameImpl(String blockName);
1550
1551    private static native int ofImpl(int codePoint);
1552
1553    /**
1554     * Constructs a new {@code Character} with the specified primitive char
1555     * value.
1556     *
1557     * @param value
1558     *            the primitive char value to store in the new instance.
1559     */
1560    public Character(char value) {
1561        this.value = value;
1562    }
1563
1564    /**
1565     * Gets the primitive value of this character.
1566     *
1567     * @return this object's primitive value.
1568     */
1569    public char charValue() {
1570        return value;
1571    }
1572
1573    private static void checkValidCodePoint(int codePoint) {
1574        if (!isValidCodePoint(codePoint)) {
1575            throw new IllegalArgumentException("Invalid code point: " + codePoint);
1576        }
1577    }
1578
1579    /**
1580     * Compares this object to the specified character object to determine their
1581     * relative order.
1582     *
1583     * @param c
1584     *            the character object to compare this object to.
1585     * @return {@code 0} if the value of this character and the value of
1586     *         {@code c} are equal; a positive value if the value of this
1587     *         character is greater than the value of {@code c}; a negative
1588     *         value if the value of this character is less than the value of
1589     *         {@code c}.
1590     * @see java.lang.Comparable
1591     * @since 1.2
1592     */
1593    public int compareTo(Character c) {
1594        return compare(value, c.value);
1595    }
1596
1597    /**
1598     * Compares two {@code char} values.
1599     * @return 0 if lhs = rhs, less than 0 if lhs &lt; rhs, and greater than 0 if lhs &gt; rhs.
1600     * @since 1.7
1601     */
1602    public static int compare(char lhs, char rhs) {
1603        return lhs - rhs;
1604    }
1605
1606    /**
1607     * Returns a {@code Character} instance for the {@code char} value passed.
1608     * <p>
1609     * If it is not necessary to get a new {@code Character} instance, it is
1610     * recommended to use this method instead of the constructor, since it
1611     * maintains a cache of instances which may result in better performance.
1612     *
1613     * @param c
1614     *            the char value for which to get a {@code Character} instance.
1615     * @return the {@code Character} instance for {@code c}.
1616     * @since 1.5
1617     */
1618    public static Character valueOf(char c) {
1619        return c < 128 ? SMALL_VALUES[c] : new Character(c);
1620    }
1621
1622    /**
1623     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1624     */
1625    private static final Character[] SMALL_VALUES = new Character[128];
1626
1627    static {
1628        for (int i = 0; i < 128; i++) {
1629            SMALL_VALUES[i] = new Character((char) i);
1630        }
1631    }
1632    /**
1633     * Indicates whether {@code codePoint} is a valid Unicode code point.
1634     *
1635     * @param codePoint
1636     *            the code point to test.
1637     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1638     *         {@code false} otherwise.
1639     * @since 1.5
1640     */
1641    public static boolean isValidCodePoint(int codePoint) {
1642        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1643    }
1644
1645    /**
1646     * Indicates whether {@code codePoint} is within the supplementary code
1647     * point range.
1648     *
1649     * @param codePoint
1650     *            the code point to test.
1651     * @return {@code true} if {@code codePoint} is within the supplementary
1652     *         code point range; {@code false} otherwise.
1653     * @since 1.5
1654     */
1655    public static boolean isSupplementaryCodePoint(int codePoint) {
1656        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1657    }
1658
1659    /**
1660     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1661     * that is used for representing supplementary characters in UTF-16
1662     * encoding.
1663     *
1664     * @param ch
1665     *            the character to test.
1666     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1667     *         {@code false} otherwise.
1668     * @see #isLowSurrogate(char)
1669     * @since 1.5
1670     */
1671    public static boolean isHighSurrogate(char ch) {
1672        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1673    }
1674
1675    /**
1676     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1677     * that is used for representing supplementary characters in UTF-16
1678     * encoding.
1679     *
1680     * @param ch
1681     *            the character to test.
1682     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1683     *         {@code false} otherwise.
1684     * @see #isHighSurrogate(char)
1685     * @since 1.5
1686     */
1687    public static boolean isLowSurrogate(char ch) {
1688        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1689    }
1690
1691    /**
1692     * Returns true if the given character is a high or low surrogate.
1693     * @since 1.7
1694     */
1695    public static boolean isSurrogate(char ch) {
1696        return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
1697    }
1698
1699    /**
1700     * Indicates whether the specified character pair is a valid surrogate pair.
1701     *
1702     * @param high
1703     *            the high surrogate unit to test.
1704     * @param low
1705     *            the low surrogate unit to test.
1706     * @return {@code true} if {@code high} is a high-surrogate code unit and
1707     *         {@code low} is a low-surrogate code unit; {@code false}
1708     *         otherwise.
1709     * @see #isHighSurrogate(char)
1710     * @see #isLowSurrogate(char)
1711     * @since 1.5
1712     */
1713    public static boolean isSurrogatePair(char high, char low) {
1714        return (isHighSurrogate(high) && isLowSurrogate(low));
1715    }
1716
1717    /**
1718     * Calculates the number of {@code char} values required to represent the
1719     * specified Unicode code point. This method checks if the {@code codePoint}
1720     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1721     * returned, otherwise {@code 1}. To test if the code point is valid, use
1722     * the {@link #isValidCodePoint(int)} method.
1723     *
1724     * @param codePoint
1725     *            the code point for which to calculate the number of required
1726     *            chars.
1727     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1728     * @see #isValidCodePoint(int)
1729     * @see #isSupplementaryCodePoint(int)
1730     * @since 1.5
1731     */
1732    public static int charCount(int codePoint) {
1733        return (codePoint >= 0x10000 ? 2 : 1);
1734    }
1735
1736    /**
1737     * Converts a surrogate pair into a Unicode code point. This method assumes
1738     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1739     * surrogates, then the result is indeterminate. The
1740     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1741     * method to validate the pair.
1742     *
1743     * @param high
1744     *            the high surrogate unit.
1745     * @param low
1746     *            the low surrogate unit.
1747     * @return the Unicode code point corresponding to the surrogate unit pair.
1748     * @see #isSurrogatePair(char, char)
1749     * @since 1.5
1750     */
1751    public static int toCodePoint(char high, char low) {
1752        // See RFC 2781, Section 2.2
1753        // http://www.ietf.org/rfc/rfc2781.txt
1754        int h = (high & 0x3FF) << 10;
1755        int l = low & 0x3FF;
1756        return (h | l) + 0x10000;
1757    }
1758
1759    /**
1760     * Returns the code point at {@code index} in the specified sequence of
1761     * character units. If the unit at {@code index} is a high-surrogate unit,
1762     * {@code index + 1} is less than the length of the sequence and the unit at
1763     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1764     * point represented by the pair is returned; otherwise the {@code char}
1765     * value at {@code index} is returned.
1766     *
1767     * @param seq
1768     *            the source sequence of {@code char} units.
1769     * @param index
1770     *            the position in {@code seq} from which to retrieve the code
1771     *            point.
1772     * @return the Unicode code point or {@code char} value at {@code index} in
1773     *         {@code seq}.
1774     * @throws NullPointerException
1775     *             if {@code seq} is {@code null}.
1776     * @throws IndexOutOfBoundsException
1777     *             if the {@code index} is negative or greater than or equal to
1778     *             the length of {@code seq}.
1779     * @since 1.5
1780     */
1781    public static int codePointAt(CharSequence seq, int index) {
1782        if (seq == null) {
1783            throw new NullPointerException("seq == null");
1784        }
1785        int len = seq.length();
1786        if (index < 0 || index >= len) {
1787            throw new IndexOutOfBoundsException();
1788        }
1789
1790        char high = seq.charAt(index++);
1791        if (index >= len) {
1792            return high;
1793        }
1794        char low = seq.charAt(index);
1795        if (isSurrogatePair(high, low)) {
1796            return toCodePoint(high, low);
1797        }
1798        return high;
1799    }
1800
1801    /**
1802     * Returns the code point at {@code index} in the specified array of
1803     * character units. If the unit at {@code index} is a high-surrogate unit,
1804     * {@code index + 1} is less than the length of the array and the unit at
1805     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1806     * point represented by the pair is returned; otherwise the {@code char}
1807     * value at {@code index} is returned.
1808     *
1809     * @param seq
1810     *            the source array of {@code char} units.
1811     * @param index
1812     *            the position in {@code seq} from which to retrieve the code
1813     *            point.
1814     * @return the Unicode code point or {@code char} value at {@code index} in
1815     *         {@code seq}.
1816     * @throws NullPointerException
1817     *             if {@code seq} is {@code null}.
1818     * @throws IndexOutOfBoundsException
1819     *             if the {@code index} is negative or greater than or equal to
1820     *             the length of {@code seq}.
1821     * @since 1.5
1822     */
1823    public static int codePointAt(char[] seq, int index) {
1824        if (seq == null) {
1825            throw new NullPointerException("seq == null");
1826        }
1827        int len = seq.length;
1828        if (index < 0 || index >= len) {
1829            throw new IndexOutOfBoundsException();
1830        }
1831
1832        char high = seq[index++];
1833        if (index >= len) {
1834            return high;
1835        }
1836        char low = seq[index];
1837        if (isSurrogatePair(high, low)) {
1838            return toCodePoint(high, low);
1839        }
1840        return high;
1841    }
1842
1843    /**
1844     * Returns the code point at {@code index} in the specified array of
1845     * character units, where {@code index} has to be less than {@code limit}.
1846     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1847     * is less than {@code limit} and the unit at {@code index + 1} is a
1848     * low-surrogate unit, then the supplementary code point represented by the
1849     * pair is returned; otherwise the {@code char} value at {@code index} is
1850     * returned.
1851     *
1852     * @param seq
1853     *            the source array of {@code char} units.
1854     * @param index
1855     *            the position in {@code seq} from which to get the code point.
1856     * @param limit
1857     *            the index after the last unit in {@code seq} that can be used.
1858     * @return the Unicode code point or {@code char} value at {@code index} in
1859     *         {@code seq}.
1860     * @throws NullPointerException
1861     *             if {@code seq} is {@code null}.
1862     * @throws IndexOutOfBoundsException
1863     *             if {@code index < 0}, {@code index >= limit},
1864     *             {@code limit < 0} or if {@code limit} is greater than the
1865     *             length of {@code seq}.
1866     * @since 1.5
1867     */
1868    public static int codePointAt(char[] seq, int index, int limit) {
1869        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1870            throw new IndexOutOfBoundsException();
1871        }
1872
1873        char high = seq[index++];
1874        if (index >= limit) {
1875            return high;
1876        }
1877        char low = seq[index];
1878        if (isSurrogatePair(high, low)) {
1879            return toCodePoint(high, low);
1880        }
1881        return high;
1882    }
1883
1884    /**
1885     * Returns the code point that precedes {@code index} in the specified
1886     * sequence of character units. If the unit at {@code index - 1} is a
1887     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1888     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1889     * point represented by the pair is returned; otherwise the {@code char}
1890     * value at {@code index - 1} is returned.
1891     *
1892     * @param seq
1893     *            the source sequence of {@code char} units.
1894     * @param index
1895     *            the position in {@code seq} following the code
1896     *            point that should be returned.
1897     * @return the Unicode code point or {@code char} value before {@code index}
1898     *         in {@code seq}.
1899     * @throws NullPointerException
1900     *             if {@code seq} is {@code null}.
1901     * @throws IndexOutOfBoundsException
1902     *             if the {@code index} is less than 1 or greater than the
1903     *             length of {@code seq}.
1904     * @since 1.5
1905     */
1906    public static int codePointBefore(CharSequence seq, int index) {
1907        if (seq == null) {
1908            throw new NullPointerException("seq == null");
1909        }
1910        int len = seq.length();
1911        if (index < 1 || index > len) {
1912            throw new IndexOutOfBoundsException();
1913        }
1914
1915        char low = seq.charAt(--index);
1916        if (--index < 0) {
1917            return low;
1918        }
1919        char high = seq.charAt(index);
1920        if (isSurrogatePair(high, low)) {
1921            return toCodePoint(high, low);
1922        }
1923        return low;
1924    }
1925
1926    /**
1927     * Returns the code point that precedes {@code index} in the specified
1928     * array of character units. If the unit at {@code index - 1} is a
1929     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1930     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1931     * point represented by the pair is returned; otherwise the {@code char}
1932     * value at {@code index - 1} is returned.
1933     *
1934     * @param seq
1935     *            the source array of {@code char} units.
1936     * @param index
1937     *            the position in {@code seq} following the code
1938     *            point that should be returned.
1939     * @return the Unicode code point or {@code char} value before {@code index}
1940     *         in {@code seq}.
1941     * @throws NullPointerException
1942     *             if {@code seq} is {@code null}.
1943     * @throws IndexOutOfBoundsException
1944     *             if the {@code index} is less than 1 or greater than the
1945     *             length of {@code seq}.
1946     * @since 1.5
1947     */
1948    public static int codePointBefore(char[] seq, int index) {
1949        if (seq == null) {
1950            throw new NullPointerException("seq == null");
1951        }
1952        int len = seq.length;
1953        if (index < 1 || index > len) {
1954            throw new IndexOutOfBoundsException();
1955        }
1956
1957        char low = seq[--index];
1958        if (--index < 0) {
1959            return low;
1960        }
1961        char high = seq[index];
1962        if (isSurrogatePair(high, low)) {
1963            return toCodePoint(high, low);
1964        }
1965        return low;
1966    }
1967
1968    /**
1969     * Returns the code point that precedes the {@code index} in the specified
1970     * array of character units and is not less than {@code start}. If the unit
1971     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1972     * less than {@code start} and the unit at {@code index - 2} is a
1973     * high-surrogate unit, then the supplementary code point represented by the
1974     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1975     * is returned.
1976     *
1977     * @param seq
1978     *            the source array of {@code char} units.
1979     * @param index
1980     *            the position in {@code seq} following the code point that
1981     *            should be returned.
1982     * @param start
1983     *            the index of the first element in {@code seq}.
1984     * @return the Unicode code point or {@code char} value before {@code index}
1985     *         in {@code seq}.
1986     * @throws NullPointerException
1987     *             if {@code seq} is {@code null}.
1988     * @throws IndexOutOfBoundsException
1989     *             if the {@code index <= start}, {@code start < 0},
1990     *             {@code index} is greater than the length of {@code seq}, or
1991     *             if {@code start} is equal or greater than the length of
1992     *             {@code seq}.
1993     * @since 1.5
1994     */
1995    public static int codePointBefore(char[] seq, int index, int start) {
1996        if (seq == null) {
1997            throw new NullPointerException("seq == null");
1998        }
1999        int len = seq.length;
2000        if (index <= start || index > len || start < 0 || start >= len) {
2001            throw new IndexOutOfBoundsException();
2002        }
2003
2004        char low = seq[--index];
2005        if (--index < start) {
2006            return low;
2007        }
2008        char high = seq[index];
2009        if (isSurrogatePair(high, low)) {
2010            return toCodePoint(high, low);
2011        }
2012        return low;
2013    }
2014
2015    /**
2016     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2017     * and copies the value(s) into the char array {@code dst}, starting at
2018     * index {@code dstIndex}.
2019     *
2020     * @param codePoint
2021     *            the Unicode code point to encode.
2022     * @param dst
2023     *            the destination array to copy the encoded value into.
2024     * @param dstIndex
2025     *            the index in {@code dst} from where to start copying.
2026     * @return the number of {@code char} value units copied into {@code dst}.
2027     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2028     * @throws NullPointerException
2029     *             if {@code dst} is {@code null}.
2030     * @throws IndexOutOfBoundsException
2031     *             if {@code dstIndex} is negative, greater than or equal to
2032     *             {@code dst.length} or equals {@code dst.length - 1} when
2033     *             {@code codePoint} is a
2034     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
2035     * @since 1.5
2036     */
2037    public static int toChars(int codePoint, char[] dst, int dstIndex) {
2038        checkValidCodePoint(codePoint);
2039        if (dst == null) {
2040            throw new NullPointerException("dst == null");
2041        }
2042        if (dstIndex < 0 || dstIndex >= dst.length) {
2043            throw new IndexOutOfBoundsException();
2044        }
2045
2046        if (isSupplementaryCodePoint(codePoint)) {
2047            if (dstIndex == dst.length - 1) {
2048                throw new IndexOutOfBoundsException();
2049            }
2050            // See RFC 2781, Section 2.1
2051            // http://www.ietf.org/rfc/rfc2781.txt
2052            int cpPrime = codePoint - 0x10000;
2053            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2054            int low = 0xDC00 | (cpPrime & 0x3FF);
2055            dst[dstIndex] = (char) high;
2056            dst[dstIndex + 1] = (char) low;
2057            return 2;
2058        }
2059
2060        dst[dstIndex] = (char) codePoint;
2061        return 1;
2062    }
2063
2064    /**
2065     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2066     * and returns it as a char array.
2067     *
2068     * @param codePoint
2069     *            the Unicode code point to encode.
2070     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
2071     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
2072     *         then the returned array contains two characters, otherwise it
2073     *         contains just one character.
2074     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2075     * @since 1.5
2076     */
2077    public static char[] toChars(int codePoint) {
2078        checkValidCodePoint(codePoint);
2079        if (isSupplementaryCodePoint(codePoint)) {
2080            int cpPrime = codePoint - 0x10000;
2081            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2082            int low = 0xDC00 | (cpPrime & 0x3FF);
2083            return new char[] { (char) high, (char) low };
2084        }
2085        return new char[] { (char) codePoint };
2086    }
2087
2088    /**
2089     * Counts the number of Unicode code points in the subsequence of the
2090     * specified character sequence, as delineated by {@code beginIndex} and
2091     * {@code endIndex}. Any surrogate values with missing pair values will be
2092     * counted as one code point.
2093     *
2094     * @param seq
2095     *            the {@code CharSequence} to look through.
2096     * @param beginIndex
2097     *            the inclusive index to begin counting at.
2098     * @param endIndex
2099     *            the exclusive index to stop counting at.
2100     * @return the number of Unicode code points.
2101     * @throws NullPointerException
2102     *             if {@code seq} is {@code null}.
2103     * @throws IndexOutOfBoundsException
2104     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2105     *             if {@code endIndex} is greater than the length of {@code seq}.
2106     * @since 1.5
2107     */
2108    public static int codePointCount(CharSequence seq, int beginIndex,
2109            int endIndex) {
2110        if (seq == null) {
2111            throw new NullPointerException("seq == null");
2112        }
2113        int len = seq.length();
2114        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2115            throw new IndexOutOfBoundsException();
2116        }
2117
2118        int result = 0;
2119        for (int i = beginIndex; i < endIndex; i++) {
2120            char c = seq.charAt(i);
2121            if (isHighSurrogate(c)) {
2122                if (++i < endIndex) {
2123                    c = seq.charAt(i);
2124                    if (!isLowSurrogate(c)) {
2125                        result++;
2126                    }
2127                }
2128            }
2129            result++;
2130        }
2131        return result;
2132    }
2133
2134    /**
2135     * Counts the number of Unicode code points in the subsequence of the
2136     * specified char array, as delineated by {@code offset} and {@code count}.
2137     * Any surrogate values with missing pair values will be counted as one code
2138     * point.
2139     *
2140     * @param seq
2141     *            the char array to look through
2142     * @param offset
2143     *            the inclusive index to begin counting at.
2144     * @param count
2145     *            the number of {@code char} values to look through in
2146     *            {@code seq}.
2147     * @return the number of Unicode code points.
2148     * @throws NullPointerException
2149     *             if {@code seq} is {@code null}.
2150     * @throws IndexOutOfBoundsException
2151     *             if {@code offset < 0}, {@code count < 0} or if
2152     *             {@code offset + count} is greater than the length of
2153     *             {@code seq}.
2154     * @since 1.5
2155     */
2156    public static int codePointCount(char[] seq, int offset, int count) {
2157        Arrays.checkOffsetAndCount(seq.length, offset, count);
2158        int endIndex = offset + count;
2159        int result = 0;
2160        for (int i = offset; i < endIndex; i++) {
2161            char c = seq[i];
2162            if (isHighSurrogate(c)) {
2163                if (++i < endIndex) {
2164                    c = seq[i];
2165                    if (!isLowSurrogate(c)) {
2166                        result++;
2167                    }
2168                }
2169            }
2170            result++;
2171        }
2172        return result;
2173    }
2174
2175    /**
2176     * Determines the index in the specified character sequence that is offset
2177     * {@code codePointOffset} code points from {@code index}.
2178     *
2179     * @param seq
2180     *            the character sequence to find the index in.
2181     * @param index
2182     *            the start index in {@code seq}.
2183     * @param codePointOffset
2184     *            the number of code points to look backwards or forwards; may
2185     *            be a negative or positive value.
2186     * @return the index in {@code seq} that is {@code codePointOffset} code
2187     *         points away from {@code index}.
2188     * @throws NullPointerException
2189     *             if {@code seq} is {@code null}.
2190     * @throws IndexOutOfBoundsException
2191     *             if {@code index < 0}, {@code index} is greater than the
2192     *             length of {@code seq}, or if there are not enough values in
2193     *             {@code seq} to skip {@code codePointOffset} code points
2194     *             forwards or backwards (if {@code codePointOffset} is
2195     *             negative) from {@code index}.
2196     * @since 1.5
2197     */
2198    public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
2199        if (seq == null) {
2200            throw new NullPointerException("seq == null");
2201        }
2202        int len = seq.length();
2203        if (index < 0 || index > len) {
2204            throw new IndexOutOfBoundsException();
2205        }
2206
2207        if (codePointOffset == 0) {
2208            return index;
2209        }
2210
2211        if (codePointOffset > 0) {
2212            int codePoints = codePointOffset;
2213            int i = index;
2214            while (codePoints > 0) {
2215                codePoints--;
2216                if (i >= len) {
2217                    throw new IndexOutOfBoundsException();
2218                }
2219                if (isHighSurrogate(seq.charAt(i))) {
2220                    int next = i + 1;
2221                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2222                        i++;
2223                    }
2224                }
2225                i++;
2226            }
2227            return i;
2228        }
2229
2230        int codePoints = -codePointOffset;
2231        int i = index;
2232        while (codePoints > 0) {
2233            codePoints--;
2234            i--;
2235            if (i < 0) {
2236                throw new IndexOutOfBoundsException();
2237            }
2238            if (isLowSurrogate(seq.charAt(i))) {
2239                int prev = i - 1;
2240                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2241                    i--;
2242                }
2243            }
2244        }
2245        return i;
2246    }
2247
2248    /**
2249     * Determines the index in a subsequence of the specified character array
2250     * that is offset {@code codePointOffset} code points from {@code index}.
2251     * The subsequence is delineated by {@code start} and {@code count}.
2252     *
2253     * @param seq
2254     *            the character array to find the index in.
2255     * @param start
2256     *            the inclusive index that marks the beginning of the
2257     *            subsequence.
2258     * @param count
2259     *            the number of {@code char} values to include within the
2260     *            subsequence.
2261     * @param index
2262     *            the start index in the subsequence of the char array.
2263     * @param codePointOffset
2264     *            the number of code points to look backwards or forwards; may
2265     *            be a negative or positive value.
2266     * @return the index in {@code seq} that is {@code codePointOffset} code
2267     *         points away from {@code index}.
2268     * @throws NullPointerException
2269     *             if {@code seq} is {@code null}.
2270     * @throws IndexOutOfBoundsException
2271     *             if {@code start < 0}, {@code count < 0},
2272     *             {@code index < start}, {@code index > start + count},
2273     *             {@code start + count} is greater than the length of
2274     *             {@code seq}, or if there are not enough values in
2275     *             {@code seq} to skip {@code codePointOffset} code points
2276     *             forward or backward (if {@code codePointOffset} is
2277     *             negative) from {@code index}.
2278     * @since 1.5
2279     */
2280    public static int offsetByCodePoints(char[] seq, int start, int count,
2281            int index, int codePointOffset) {
2282        Arrays.checkOffsetAndCount(seq.length, start, count);
2283        int end = start + count;
2284        if (index < start || index > end) {
2285            throw new IndexOutOfBoundsException();
2286        }
2287
2288        if (codePointOffset == 0) {
2289            return index;
2290        }
2291
2292        if (codePointOffset > 0) {
2293            int codePoints = codePointOffset;
2294            int i = index;
2295            while (codePoints > 0) {
2296                codePoints--;
2297                if (i >= end) {
2298                    throw new IndexOutOfBoundsException();
2299                }
2300                if (isHighSurrogate(seq[i])) {
2301                    int next = i + 1;
2302                    if (next < end && isLowSurrogate(seq[next])) {
2303                        i++;
2304                    }
2305                }
2306                i++;
2307            }
2308            return i;
2309        }
2310
2311        int codePoints = -codePointOffset;
2312        int i = index;
2313        while (codePoints > 0) {
2314            codePoints--;
2315            i--;
2316            if (i < start) {
2317                throw new IndexOutOfBoundsException();
2318            }
2319            if (isLowSurrogate(seq[i])) {
2320                int prev = i - 1;
2321                if (prev >= start && isHighSurrogate(seq[prev])) {
2322                    i--;
2323                }
2324            }
2325        }
2326        return i;
2327    }
2328
2329    /**
2330     * Convenience method to determine the value of the specified character
2331     * {@code c} in the supplied radix. The value of {@code radix} must be
2332     * between MIN_RADIX and MAX_RADIX.
2333     *
2334     * @param c
2335     *            the character to determine the value of.
2336     * @param radix
2337     *            the radix.
2338     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2339     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2340     */
2341    public static int digit(char c, int radix) {
2342        return digit((int) c, radix);
2343    }
2344
2345    /**
2346     * Convenience method to determine the value of the character
2347     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2348     * be between MIN_RADIX and MAX_RADIX.
2349     *
2350     * @param codePoint
2351     *            the character, including supplementary characters.
2352     * @param radix
2353     *            the radix.
2354     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2355     *         {@link #MAX_RADIX} then the value of the character in the radix;
2356     *         -1 otherwise.
2357     */
2358    public static int digit(int codePoint, int radix) {
2359        if (radix < MIN_RADIX || radix > MAX_RADIX) {
2360            return -1;
2361        }
2362        if (codePoint < 128) {
2363            // Optimized for ASCII
2364            int result = -1;
2365            if ('0' <= codePoint && codePoint <= '9') {
2366                result = codePoint - '0';
2367            } else if ('a' <= codePoint && codePoint <= 'z') {
2368                result = 10 + (codePoint - 'a');
2369            } else if ('A' <= codePoint && codePoint <= 'Z') {
2370                result = 10 + (codePoint - 'A');
2371            }
2372            return result < radix ? result : -1;
2373        }
2374        return digitImpl(codePoint, radix);
2375    }
2376
2377    private static native int digitImpl(int codePoint, int radix);
2378
2379    /**
2380     * Compares this object with the specified object and indicates if they are
2381     * equal. In order to be equal, {@code object} must be an instance of
2382     * {@code Character} and have the same char value as this object.
2383     *
2384     * @param object
2385     *            the object to compare this double with.
2386     * @return {@code true} if the specified object is equal to this
2387     *         {@code Character}; {@code false} otherwise.
2388     */
2389    @Override
2390    public boolean equals(Object object) {
2391        return (object instanceof Character) && (((Character) object).value == value);
2392    }
2393
2394    /**
2395     * Returns the character which represents the specified digit in the
2396     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2397     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2398     * smaller than {@code radix}. If any of these conditions does not hold, 0
2399     * is returned.
2400     *
2401     * @param digit
2402     *            the integer value.
2403     * @param radix
2404     *            the radix.
2405     * @return the character which represents the {@code digit} in the
2406     *         {@code radix}.
2407     */
2408    public static char forDigit(int digit, int radix) {
2409        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2410            if (digit >= 0 && digit < radix) {
2411                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2412            }
2413        }
2414        return 0;
2415    }
2416
2417    /**
2418     * Returns a human-readable name for the given code point,
2419     * or null if the code point is unassigned.
2420     *
2421     * <p>As a fallback mechanism this method returns strings consisting of the Unicode
2422     * block name (with underscores replaced by spaces), a single space, and the uppercase
2423     * hex value of the code point, using as few digits as necessary.
2424     *
2425     * <p>Examples:
2426     * <ul>
2427     * <li>{@code Character.getName(0)} returns "NULL".
2428     * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E".
2429     * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX".
2430     * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000".
2431     * </ul>
2432     *
2433     * <p>Note that the exact strings returned will vary from release to release.
2434     *
2435     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2436     * @since 1.7
2437     */
2438    public static String getName(int codePoint) {
2439        checkValidCodePoint(codePoint);
2440        if (getType(codePoint) == Character.UNASSIGNED) {
2441            return null;
2442        }
2443        String result = getNameImpl(codePoint);
2444        if (result == null) {
2445            String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ');
2446            result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0);
2447        }
2448        return result;
2449    }
2450
2451    private static native String getNameImpl(int codePoint);
2452
2453    /**
2454     * Returns the numeric value of the specified Unicode character.
2455     * See {@link #getNumericValue(int)}.
2456     *
2457     * @param c the character
2458     * @return a non-negative numeric integer value if a numeric value for
2459     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2460     *         -2 if the numeric value can not be represented as an integer.
2461     */
2462    public static int getNumericValue(char c) {
2463        return getNumericValue((int) c);
2464    }
2465
2466    /**
2467     * Gets the numeric value of the specified Unicode code point. For example,
2468     * the code point '\u216B' stands for the Roman number XII, which has the
2469     * numeric value 12.
2470     *
2471     * <p>There are two points of divergence between this method and the Unicode
2472     * specification. This method treats the letters a-z (in both upper and lower
2473     * cases, and their full-width variants) as numbers from 10 to 35. The
2474     * Unicode specification also supports the idea of code points with non-integer
2475     * numeric values; this method does not (except to the extent of returning -2
2476     * for such code points).
2477     *
2478     * @param codePoint the code point
2479     * @return a non-negative numeric integer value if a numeric value for
2480     *         {@code codePoint} exists, -1 if there is no numeric value for
2481     *         {@code codePoint}, -2 if the numeric value can not be
2482     *         represented with an integer.
2483     */
2484    public static int getNumericValue(int codePoint) {
2485        // This is both an optimization and papers over differences between Java and ICU.
2486        if (codePoint < 128) {
2487            if (codePoint >= '0' && codePoint <= '9') {
2488                return codePoint - '0';
2489            }
2490            if (codePoint >= 'a' && codePoint <= 'z') {
2491                return codePoint - ('a' - 10);
2492            }
2493            if (codePoint >= 'A' && codePoint <= 'Z') {
2494                return codePoint - ('A' - 10);
2495            }
2496            return -1;
2497        }
2498        // Full-width uppercase A-Z.
2499        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
2500            return codePoint - 0xff17;
2501        }
2502        // Full-width lowercase a-z.
2503        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
2504            return codePoint - 0xff37;
2505        }
2506        return getNumericValueImpl(codePoint);
2507    }
2508
2509    private static native int getNumericValueImpl(int codePoint);
2510
2511    /**
2512     * Gets the general Unicode category of the specified character.
2513     *
2514     * @param c
2515     *            the character to get the category of.
2516     * @return the Unicode category of {@code c}.
2517     */
2518    public static int getType(char c) {
2519        return getType((int) c);
2520    }
2521
2522    /**
2523     * Gets the general Unicode category of the specified code point.
2524     *
2525     * @param codePoint
2526     *            the Unicode code point to get the category of.
2527     * @return the Unicode category of {@code codePoint}.
2528     */
2529    public static int getType(int codePoint) {
2530        int type = getTypeImpl(codePoint);
2531        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
2532        if (type <= Character.FORMAT) {
2533            return type;
2534        }
2535        return (type + 1);
2536    }
2537
2538    private static native int getTypeImpl(int codePoint);
2539
2540    /**
2541     * Gets the Unicode directionality of the specified character.
2542     *
2543     * @param c
2544     *            the character to get the directionality of.
2545     * @return the Unicode directionality of {@code c}.
2546     */
2547    public static byte getDirectionality(char c) {
2548        return getDirectionality((int)c);
2549    }
2550
2551    /**
2552     * Gets the Unicode directionality of the specified character.
2553     *
2554     * @param codePoint
2555     *            the Unicode code point to get the directionality of.
2556     * @return the Unicode directionality of {@code codePoint}.
2557     */
2558    public static byte getDirectionality(int codePoint) {
2559        if (getType(codePoint) == Character.UNASSIGNED) {
2560            return Character.DIRECTIONALITY_UNDEFINED;
2561        }
2562
2563        byte directionality = getDirectionalityImpl(codePoint);
2564        if (directionality == -1) {
2565            return -1;
2566        }
2567        return DIRECTIONALITY[directionality];
2568    }
2569
2570    private static native byte getDirectionalityImpl(int codePoint);
2571
2572    /**
2573     * Indicates whether the specified character is mirrored.
2574     *
2575     * @param c
2576     *            the character to check.
2577     * @return {@code true} if {@code c} is mirrored; {@code false}
2578     *         otherwise.
2579     */
2580    public static boolean isMirrored(char c) {
2581        return isMirrored((int) c);
2582    }
2583
2584    /**
2585     * Indicates whether the specified code point is mirrored.
2586     *
2587     * @param codePoint
2588     *            the code point to check.
2589     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2590     *         otherwise.
2591     */
2592    public static boolean isMirrored(int codePoint) {
2593        return isMirroredImpl(codePoint);
2594    }
2595
2596    private static native boolean isMirroredImpl(int codePoint);
2597
2598    @Override
2599    public int hashCode() {
2600        return value;
2601    }
2602
2603    /**
2604     * Returns the high surrogate for the given code point. The result is meaningless if
2605     * the given code point is not a supplementary character.
2606     * @since 1.7
2607     */
2608    public static char highSurrogate(int codePoint) {
2609        return (char) ((codePoint >> 10) + 0xd7c0);
2610    }
2611
2612    /**
2613     * Returns the low surrogate for the given code point. The result is meaningless if
2614     * the given code point is not a supplementary character.
2615     * @since 1.7
2616     */
2617    public static char lowSurrogate(int codePoint) {
2618        return (char) ((codePoint & 0x3ff) | 0xdc00);
2619    }
2620
2621    /**
2622     * Returns true if the given code point is in the Basic Multilingual Plane (BMP).
2623     * Such code points can be represented by a single {@code char}.
2624     * @since 1.7
2625     */
2626    public static boolean isBmpCodePoint(int codePoint) {
2627       return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE;
2628    }
2629
2630    /**
2631     * Indicates whether the specified character is defined in the Unicode
2632     * specification.
2633     *
2634     * @param c
2635     *            the character to check.
2636     * @return {@code true} if the general Unicode category of the character is
2637     *         not {@code UNASSIGNED}; {@code false} otherwise.
2638     */
2639    public static boolean isDefined(char c) {
2640        return isDefinedImpl(c);
2641    }
2642
2643    /**
2644     * Indicates whether the specified code point is defined in the Unicode
2645     * specification.
2646     *
2647     * @param codePoint
2648     *            the code point to check.
2649     * @return {@code true} if the general Unicode category of the code point is
2650     *         not {@code UNASSIGNED}; {@code false} otherwise.
2651     */
2652    public static boolean isDefined(int codePoint) {
2653        return isDefinedImpl(codePoint);
2654    }
2655
2656    private static native boolean isDefinedImpl(int codePoint);
2657
2658    /**
2659     * Indicates whether the specified character is a digit.
2660     *
2661     * @param c
2662     *            the character to check.
2663     * @return {@code true} if {@code c} is a digit; {@code false}
2664     *         otherwise.
2665     */
2666    public static boolean isDigit(char c) {
2667        return isDigit((int) c);
2668    }
2669
2670    /**
2671     * Indicates whether the specified code point is a digit.
2672     *
2673     * @param codePoint
2674     *            the code point to check.
2675     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2676     *         otherwise.
2677     */
2678    public static boolean isDigit(int codePoint) {
2679        // Optimized case for ASCII
2680        if ('0' <= codePoint && codePoint <= '9') {
2681            return true;
2682        }
2683        if (codePoint < 1632) {
2684            return false;
2685        }
2686        return isDigitImpl(codePoint);
2687    }
2688
2689    private static native boolean isDigitImpl(int codePoint);
2690
2691    /**
2692     * Indicates whether the specified character is ignorable in a Java or
2693     * Unicode identifier.
2694     *
2695     * @param c
2696     *            the character to check.
2697     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2698     */
2699    public static boolean isIdentifierIgnorable(char c) {
2700        return isIdentifierIgnorable((int) c);
2701    }
2702
2703    /**
2704     * Indicates whether the specified code point is ignorable in a Java or
2705     * Unicode identifier.
2706     *
2707     * @param codePoint
2708     *            the code point to check.
2709     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2710     *         otherwise.
2711     */
2712    public static boolean isIdentifierIgnorable(int codePoint) {
2713        // This is both an optimization and papers over differences between Java and ICU.
2714        if (codePoint < 0x600) {
2715            return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
2716                    (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
2717        }
2718        return isIdentifierIgnorableImpl(codePoint);
2719    }
2720
2721    private static native boolean isIdentifierIgnorableImpl(int codePoint);
2722
2723    /**
2724     * Indicates whether the specified character is an ISO control character.
2725     *
2726     * @param c
2727     *            the character to check.
2728     * @return {@code true} if {@code c} is an ISO control character;
2729     *         {@code false} otherwise.
2730     */
2731    public static boolean isISOControl(char c) {
2732        return isISOControl((int) c);
2733    }
2734
2735    /**
2736     * Indicates whether the specified code point is an ISO control character.
2737     *
2738     * @param c
2739     *            the code point to check.
2740     * @return {@code true} if {@code c} is an ISO control character;
2741     *         {@code false} otherwise.
2742     */
2743    public static boolean isISOControl(int c) {
2744        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2745    }
2746
2747    /**
2748     * Indicates whether the specified character is a valid part of a Java
2749     * identifier other than the first character.
2750     *
2751     * @param c
2752     *            the character to check.
2753     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2754     *         {@code false} otherwise.
2755     */
2756    public static boolean isJavaIdentifierPart(char c) {
2757        return isJavaIdentifierPart((int) c);
2758    }
2759
2760    /**
2761     * Indicates whether the specified code point is a valid part of a Java
2762     * identifier other than the first character.
2763     *
2764     * @param codePoint
2765     *            the code point to check.
2766     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2767     *         {@code false} otherwise.
2768     */
2769    public static boolean isJavaIdentifierPart(int codePoint) {
2770        // Use precomputed bitmasks to optimize the ASCII range.
2771        if (codePoint < 64) {
2772            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
2773        } else if (codePoint < 128) {
2774            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2775        }
2776        int type = getType(codePoint);
2777        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2778                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2779                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2780                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2781                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
2782                || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
2783    }
2784
2785    /**
2786     * Indicates whether the specified character is a valid first character for
2787     * a Java identifier.
2788     *
2789     * @param c
2790     *            the character to check.
2791     * @return {@code true} if {@code c} is a valid first character of a Java
2792     *         identifier; {@code false} otherwise.
2793     */
2794    public static boolean isJavaIdentifierStart(char c) {
2795        return isJavaIdentifierStart((int) c);
2796    }
2797
2798    /**
2799     * Indicates whether the specified code point is a valid first character for
2800     * a Java identifier.
2801     *
2802     * @param codePoint
2803     *            the code point to check.
2804     * @return {@code true} if {@code codePoint} is a valid start of a Java
2805     *         identifier; {@code false} otherwise.
2806     */
2807    public static boolean isJavaIdentifierStart(int codePoint) {
2808        // Use precomputed bitmasks to optimize the ASCII range.
2809        if (codePoint < 64) {
2810            return (codePoint == '$'); // There's only one character in this range.
2811        } else if (codePoint < 128) {
2812            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2813        }
2814        int type = getType(codePoint);
2815        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
2816                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2817    }
2818
2819    /**
2820     * Indicates whether the specified character is a Java letter.
2821     *
2822     * @param c
2823     *            the character to check.
2824     * @return {@code true} if {@code c} is a Java letter; {@code false}
2825     *         otherwise.
2826     * @deprecated Use {@link #isJavaIdentifierStart(char)} instead.
2827     */
2828    @Deprecated
2829    public static boolean isJavaLetter(char c) {
2830        return isJavaIdentifierStart(c);
2831    }
2832
2833    /**
2834     * Indicates whether the specified character is a Java letter or digit
2835     * character.
2836     *
2837     * @param c
2838     *            the character to check.
2839     * @return {@code true} if {@code c} is a Java letter or digit;
2840     *         {@code false} otherwise.
2841     * @deprecated Use {@link #isJavaIdentifierPart(char)} instead.
2842     */
2843    @Deprecated
2844    public static boolean isJavaLetterOrDigit(char c) {
2845        return isJavaIdentifierPart(c);
2846    }
2847
2848    /**
2849     * Indicates whether the specified character is a letter.
2850     *
2851     * @param c
2852     *            the character to check.
2853     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2854     */
2855    public static boolean isLetter(char c) {
2856        return isLetter((int) c);
2857    }
2858
2859    /**
2860     * Indicates whether the specified code point is a letter.
2861     *
2862     * @param codePoint
2863     *            the code point to check.
2864     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2865     *         otherwise.
2866     */
2867    public static boolean isLetter(int codePoint) {
2868        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2869            return true;
2870        }
2871        if (codePoint < 128) {
2872            return false;
2873        }
2874        return isLetterImpl(codePoint);
2875    }
2876
2877    private static native boolean isLetterImpl(int codePoint);
2878
2879    /**
2880     * Indicates whether the specified character is a letter or a digit.
2881     *
2882     * @param c
2883     *            the character to check.
2884     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2885     *         otherwise.
2886     */
2887    public static boolean isLetterOrDigit(char c) {
2888        return isLetterOrDigit((int) c);
2889    }
2890
2891    /**
2892     * Indicates whether the specified code point is a letter or a digit.
2893     *
2894     * @param codePoint
2895     *            the code point to check.
2896     * @return {@code true} if {@code codePoint} is a letter or a digit;
2897     *         {@code false} otherwise.
2898     */
2899    public static boolean isLetterOrDigit(int codePoint) {
2900        // Optimized case for ASCII
2901        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2902            return true;
2903        }
2904        if ('0' <= codePoint && codePoint <= '9') {
2905            return true;
2906        }
2907        if (codePoint < 128) {
2908            return false;
2909        }
2910        return isLetterOrDigitImpl(codePoint);
2911    }
2912
2913    private static native boolean isLetterOrDigitImpl(int codePoint);
2914
2915    /**
2916     * Indicates whether the specified character is a lower case letter.
2917     *
2918     * @param c
2919     *            the character to check.
2920     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2921     *         otherwise.
2922     */
2923    public static boolean isLowerCase(char c) {
2924        return isLowerCase((int) c);
2925    }
2926
2927    /**
2928     * Indicates whether the specified code point is a lower case letter.
2929     *
2930     * @param codePoint
2931     *            the code point to check.
2932     * @return {@code true} if {@code codePoint} is a lower case letter;
2933     *         {@code false} otherwise.
2934     */
2935    public static boolean isLowerCase(int codePoint) {
2936        // Optimized case for ASCII
2937        if ('a' <= codePoint && codePoint <= 'z') {
2938            return true;
2939        }
2940        if (codePoint < 128) {
2941            return false;
2942        }
2943        return isLowerCaseImpl(codePoint);
2944    }
2945
2946    private static native boolean isLowerCaseImpl(int codePoint);
2947
2948    /**
2949     * Indicates whether the specified character is a Java space.
2950     *
2951     * @param c
2952     *            the character to check.
2953     * @return {@code true} if {@code c} is a Java space; {@code false}
2954     *         otherwise.
2955     * @deprecated Use {@link #isWhitespace(char)} instead.
2956     */
2957    @Deprecated
2958    public static boolean isSpace(char c) {
2959        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2960    }
2961
2962    /**
2963     * Indicates whether the specified character is a Unicode space character.
2964     * That is, if it is a member of one of the Unicode categories Space
2965     * Separator, Line Separator, or Paragraph Separator.
2966     *
2967     * @param c
2968     *            the character to check.
2969     * @return {@code true} if {@code c} is a Unicode space character,
2970     *         {@code false} otherwise.
2971     */
2972    public static boolean isSpaceChar(char c) {
2973        return isSpaceChar((int) c);
2974    }
2975
2976    /**
2977     * Indicates whether the specified code point is a Unicode space character.
2978     * That is, if it is a member of one of the Unicode categories Space
2979     * Separator, Line Separator, or Paragraph Separator.
2980     *
2981     * @param codePoint
2982     *            the code point to check.
2983     * @return {@code true} if {@code codePoint} is a Unicode space character,
2984     *         {@code false} otherwise.
2985     */
2986    public static boolean isSpaceChar(int codePoint) {
2987        if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
2988            return true;
2989        }
2990        if (codePoint < 0x2000) {
2991            return false;
2992        }
2993        if (codePoint <= 0xffff) {
2994            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
2995                    codePoint == 0x202f || codePoint == 0x3000;
2996        }
2997        return isSpaceCharImpl(codePoint);
2998    }
2999
3000    private static native boolean isSpaceCharImpl(int codePoint);
3001
3002    /**
3003     * Indicates whether the specified character is a titlecase character.
3004     *
3005     * @param c
3006     *            the character to check.
3007     * @return {@code true} if {@code c} is a titlecase character, {@code false}
3008     *         otherwise.
3009     */
3010    public static boolean isTitleCase(char c) {
3011        return isTitleCaseImpl(c);
3012    }
3013
3014    /**
3015     * Indicates whether the specified code point is a titlecase character.
3016     *
3017     * @param codePoint
3018     *            the code point to check.
3019     * @return {@code true} if {@code codePoint} is a titlecase character,
3020     *         {@code false} otherwise.
3021     */
3022    public static boolean isTitleCase(int codePoint) {
3023        return isTitleCaseImpl(codePoint);
3024    }
3025
3026    private static native boolean isTitleCaseImpl(int codePoint);
3027
3028    /**
3029     * Indicates whether the specified character is valid as part of a Unicode
3030     * identifier other than the first character.
3031     *
3032     * @param c
3033     *            the character to check.
3034     * @return {@code true} if {@code c} is valid as part of a Unicode
3035     *         identifier; {@code false} otherwise.
3036     */
3037    public static boolean isUnicodeIdentifierPart(char c) {
3038        return isUnicodeIdentifierPartImpl(c);
3039    }
3040
3041    /**
3042     * Indicates whether the specified code point is valid as part of a Unicode
3043     * identifier other than the first character.
3044     *
3045     * @param codePoint
3046     *            the code point to check.
3047     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
3048     *         identifier; {@code false} otherwise.
3049     */
3050    public static boolean isUnicodeIdentifierPart(int codePoint) {
3051        return isUnicodeIdentifierPartImpl(codePoint);
3052    }
3053
3054    private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
3055
3056    /**
3057     * Indicates whether the specified character is a valid initial character
3058     * for a Unicode identifier.
3059     *
3060     * @param c
3061     *            the character to check.
3062     * @return {@code true} if {@code c} is a valid first character for a
3063     *         Unicode identifier; {@code false} otherwise.
3064     */
3065    public static boolean isUnicodeIdentifierStart(char c) {
3066        return isUnicodeIdentifierStartImpl(c);
3067    }
3068
3069    /**
3070     * Indicates whether the specified code point is a valid initial character
3071     * for a Unicode identifier.
3072     *
3073     * @param codePoint
3074     *            the code point to check.
3075     * @return {@code true} if {@code codePoint} is a valid first character for
3076     *         a Unicode identifier; {@code false} otherwise.
3077     */
3078    public static boolean isUnicodeIdentifierStart(int codePoint) {
3079        return isUnicodeIdentifierStartImpl(codePoint);
3080    }
3081
3082    private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
3083
3084    /**
3085     * Indicates whether the specified character is an upper case letter.
3086     *
3087     * @param c
3088     *            the character to check.
3089     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3090     *         otherwise.
3091     */
3092    public static boolean isUpperCase(char c) {
3093        return isUpperCase((int) c);
3094    }
3095
3096    /**
3097     * Indicates whether the specified code point is an upper case letter.
3098     *
3099     * @param codePoint
3100     *            the code point to check.
3101     * @return {@code true} if {@code codePoint} is a upper case letter;
3102     *         {@code false} otherwise.
3103     */
3104    public static boolean isUpperCase(int codePoint) {
3105        // Optimized case for ASCII
3106        if ('A' <= codePoint && codePoint <= 'Z') {
3107            return true;
3108        }
3109        if (codePoint < 128) {
3110            return false;
3111        }
3112        return isUpperCaseImpl(codePoint);
3113    }
3114
3115    private static native boolean isUpperCaseImpl(int codePoint);
3116
3117    /**
3118     * Indicates whether the specified character is a whitespace character in
3119     * Java.
3120     *
3121     * @param c
3122     *            the character to check.
3123     * @return {@code true} if the supplied {@code c} is a whitespace character
3124     *         in Java; {@code false} otherwise.
3125     */
3126    public static boolean isWhitespace(char c) {
3127        return isWhitespace((int) c);
3128    }
3129
3130    /**
3131     * Indicates whether the specified code point is a whitespace character in
3132     * Java.
3133     *
3134     * @param codePoint
3135     *            the code point to check.
3136     * @return {@code true} if the supplied {@code c} is a whitespace character
3137     *         in Java; {@code false} otherwise.
3138     */
3139    public static boolean isWhitespace(int codePoint) {
3140        // This is both an optimization and papers over differences between Java and ICU.
3141        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
3142            return true;
3143        }
3144        if (codePoint == 0x1680) {
3145            return true;
3146        }
3147        if (codePoint < 0x2000 || codePoint == 0x2007) {
3148            return false;
3149        }
3150        if (codePoint <= 0xffff) {
3151            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3152                    codePoint == 0x3000;
3153        }
3154        return isWhitespaceImpl(codePoint);
3155    }
3156
3157    private static native boolean isWhitespaceImpl(int codePoint);
3158
3159    /**
3160     * Reverses the order of the first and second byte in the specified
3161     * character.
3162     *
3163     * @param c
3164     *            the character to reverse.
3165     * @return the character with reordered bytes.
3166     */
3167    public static char reverseBytes(char c) {
3168        return (char)((c<<8) | (c>>8));
3169    }
3170
3171    /**
3172     * Returns the lower case equivalent for the specified character if the
3173     * character is an upper case letter. Otherwise, the specified character is
3174     * returned unchanged.
3175     *
3176     * @param c
3177     *            the character
3178     * @return if {@code c} is an upper case character then its lower case
3179     *         counterpart, otherwise just {@code c}.
3180     */
3181    public static char toLowerCase(char c) {
3182        return (char) toLowerCase((int) c);
3183    }
3184
3185    /**
3186     * Returns the lower case equivalent for the specified code point if it is
3187     * an upper case letter. Otherwise, the specified code point is returned
3188     * unchanged.
3189     *
3190     * @param codePoint
3191     *            the code point to check.
3192     * @return if {@code codePoint} is an upper case character then its lower
3193     *         case counterpart, otherwise just {@code codePoint}.
3194     */
3195    public static int toLowerCase(int codePoint) {
3196        // Optimized case for ASCII
3197        if ('A' <= codePoint && codePoint <= 'Z') {
3198            return (char) (codePoint + ('a' - 'A'));
3199        }
3200        if (codePoint < 192) {
3201            return codePoint;
3202        }
3203        return toLowerCaseImpl(codePoint);
3204    }
3205
3206    private static native int toLowerCaseImpl(int codePoint);
3207
3208    @Override
3209    public String toString() {
3210        return String.valueOf(value);
3211    }
3212
3213    /**
3214     * Converts the specified character to its string representation.
3215     *
3216     * @param value
3217     *            the character to convert.
3218     * @return the character converted to a string.
3219     */
3220    public static String toString(char value) {
3221        return String.valueOf(value);
3222    }
3223
3224    /**
3225     * Returns the title case equivalent for the specified character if it
3226     * exists. Otherwise, the specified character is returned unchanged.
3227     *
3228     * @param c
3229     *            the character to convert.
3230     * @return the title case equivalent of {@code c} if it exists, otherwise
3231     *         {@code c}.
3232     */
3233    public static char toTitleCase(char c) {
3234        return (char) toTitleCaseImpl(c);
3235    }
3236
3237    /**
3238     * Returns the title case equivalent for the specified code point if it
3239     * exists. Otherwise, the specified code point is returned unchanged.
3240     *
3241     * @param codePoint
3242     *            the code point to convert.
3243     * @return the title case equivalent of {@code codePoint} if it exists,
3244     *         otherwise {@code codePoint}.
3245     */
3246    public static int toTitleCase(int codePoint) {
3247        return toTitleCaseImpl(codePoint);
3248    }
3249
3250    private static native int toTitleCaseImpl(int codePoint);
3251
3252    /**
3253     * Returns the upper case equivalent for the specified character if the
3254     * character is a lower case letter. Otherwise, the specified character is
3255     * returned unchanged.
3256     *
3257     * @param c
3258     *            the character to convert.
3259     * @return if {@code c} is a lower case character then its upper case
3260     *         counterpart, otherwise just {@code c}.
3261     */
3262    public static char toUpperCase(char c) {
3263        return (char) toUpperCase((int) c);
3264    }
3265
3266    /**
3267     * Returns the upper case equivalent for the specified code point if the
3268     * code point is a lower case letter. Otherwise, the specified code point is
3269     * returned unchanged.
3270     *
3271     * @param codePoint
3272     *            the code point to convert.
3273     * @return if {@code codePoint} is a lower case character then its upper
3274     *         case counterpart, otherwise just {@code codePoint}.
3275     */
3276    public static int toUpperCase(int codePoint) {
3277        // Optimized case for ASCII
3278        if ('a' <= codePoint && codePoint <= 'z') {
3279            return (char) (codePoint - ('a' - 'A'));
3280        }
3281        if (codePoint < 181) {
3282            return codePoint;
3283        }
3284        return toUpperCaseImpl(codePoint);
3285    }
3286
3287    private static native int toUpperCaseImpl(int codePoint);
3288}
3289