Character.java revision 8ffa0b68c9fd3f722bee2bcd94b1d38151a0791d
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21import java.util.Arrays;
22
23/**
24 * The wrapper for the primitive type {@code char}. This class also provides a
25 * number of utility methods for working with characters.
26 *
27 * <p>Character data is kept up to date as Unicode evolves.
28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of
29 * the {@code Locale} documentation for details of the Unicode versions implemented by current
30 * and historical Android releases.
31 *
32 * <p>The Unicode specification, character tables, and other information are available at
33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>.
34 *
35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid
36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
39 * encoding and {@code char} pairs are used to represent code points in the
40 * supplementary range. A pair of {@code char} values that represent a
41 * supplementary character are made up of a <i>high surrogate</i> with a value
42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
43 * 0xDC00 to 0xDFFF.
44 * <p>
45 * On the Java platform a {@code char} value represents either a single BMP code
46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
47 * is used to represent all Unicode code points.
48 *
49 * <a name="unicode_categories"><h3>Unicode categories</h3></a>
50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant,
51 * grouped semantically to provide a convenient overview. This table is also useful in
52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}.
53 * <span class="datatable">
54 * <style type="text/css">
55 * .datatable td { padding-right: 20px; }
56 * </style>
57 * <p><table>
58 * <tr> <td> Cn </td> <td> Unassigned </td>  <td>{@link #UNASSIGNED}</td> </tr>
59 * <tr> <td> Cc </td> <td> Control </td>     <td>{@link #CONTROL}</td> </tr>
60 * <tr> <td> Cf </td> <td> Format </td>      <td>{@link #FORMAT}</td> </tr>
61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr>
62 * <tr> <td> Cs </td> <td> Surrogate </td>   <td>{@link #SURROGATE}</td> </tr>
63 * <tr> <td><br></td> </tr>
64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr>
65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr>
66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr>
67 * <tr> <td> Lm </td> <td> Modifier letter </td>  <td>{@link #MODIFIER_LETTER}</td> </tr>
68 * <tr> <td> Lo </td> <td> Other letter </td>     <td>{@link #OTHER_LETTER}</td> </tr>
69 * <tr> <td><br></td> </tr>
70 * <tr> <td> Mn </td> <td> Non-spacing mark </td>       <td>{@link #NON_SPACING_MARK}</td> </tr>
71 * <tr> <td> Me </td> <td> Enclosing mark </td>         <td>{@link #ENCLOSING_MARK}</td> </tr>
72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr>
73 * <tr> <td><br></td> </tr>
74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr>
75 * <tr> <td> Nl </td> <td> Letter number </td>        <td>{@link #LETTER_NUMBER}</td> </tr>
76 * <tr> <td> No </td> <td> Other number </td>         <td>{@link #OTHER_NUMBER}</td> </tr>
77 * <tr> <td><br></td> </tr>
78 * <tr> <td> Pd </td> <td> Dash punctuation </td>          <td>{@link #DASH_PUNCTUATION}</td> </tr>
79 * <tr> <td> Ps </td> <td> Start punctuation </td>         <td>{@link #START_PUNCTUATION}</td> </tr>
80 * <tr> <td> Pe </td> <td> End punctuation </td>           <td>{@link #END_PUNCTUATION}</td> </tr>
81 * <tr> <td> Pc </td> <td> Connector punctuation </td>     <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr>
82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr>
83 * <tr> <td> Pf </td> <td> Final quote punctuation </td>   <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr>
84 * <tr> <td> Po </td> <td> Other punctuation </td>         <td>{@link #OTHER_PUNCTUATION}</td> </tr>
85 * <tr> <td><br></td> </tr>
86 * <tr> <td> Sm </td> <td> Math symbol </td>     <td>{@link #MATH_SYMBOL}</td> </tr>
87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr>
88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr>
89 * <tr> <td> So </td> <td> Other symbol </td>    <td>{@link #OTHER_SYMBOL}</td> </tr>
90 * <tr> <td><br></td> </tr>
91 * <tr> <td> Zs </td> <td> Space separator </td>     <td>{@link #SPACE_SEPARATOR}</td> </tr>
92 * <tr> <td> Zl </td> <td> Line separator </td>      <td>{@link #LINE_SEPARATOR}</td> </tr>
93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr>
94 * </table>
95 * </span>
96 *
97 * @since 1.0
98 */
99@FindBugsSuppressWarnings("DM_NUMBER_CTOR")
100public final class Character implements Serializable, Comparable<Character> {
101    private static final long serialVersionUID = 3786198910865385080L;
102
103    private final char value;
104
105    /**
106     * The minimum {@code Character} value.
107     */
108    public static final char MIN_VALUE = '\u0000';
109
110    /**
111     * The maximum {@code Character} value.
112     */
113    public static final char MAX_VALUE = '\uffff';
114
115    /**
116     * The minimum radix used for conversions between characters and integers.
117     */
118    public static final int MIN_RADIX = 2;
119
120    /**
121     * The maximum radix used for conversions between characters and integers.
122     */
123    public static final int MAX_RADIX = 36;
124
125    /**
126     * The {@link Class} object that represents the primitive type {@code char}.
127     */
128    @SuppressWarnings("unchecked")
129    public static final Class<Character> TYPE
130            = (Class<Character>) char[].class.getComponentType();
131    // Note: Character.TYPE can't be set to "char.class", since *that* is
132    // defined to be "java.lang.Character.TYPE";
133
134    /**
135     * Unicode category constant Cn.
136     */
137    public static final byte UNASSIGNED = 0;
138
139    /**
140     * Unicode category constant Lu.
141     */
142    public static final byte UPPERCASE_LETTER = 1;
143
144    /**
145     * Unicode category constant Ll.
146     */
147    public static final byte LOWERCASE_LETTER = 2;
148
149    /**
150     * Unicode category constant Lt.
151     */
152    public static final byte TITLECASE_LETTER = 3;
153
154    /**
155     * Unicode category constant Lm.
156     */
157    public static final byte MODIFIER_LETTER = 4;
158
159    /**
160     * Unicode category constant Lo.
161     */
162    public static final byte OTHER_LETTER = 5;
163
164    /**
165     * Unicode category constant Mn.
166     */
167    public static final byte NON_SPACING_MARK = 6;
168
169    /**
170     * Unicode category constant Me.
171     */
172    public static final byte ENCLOSING_MARK = 7;
173
174    /**
175     * Unicode category constant Mc.
176     */
177    public static final byte COMBINING_SPACING_MARK = 8;
178
179    /**
180     * Unicode category constant Nd.
181     */
182    public static final byte DECIMAL_DIGIT_NUMBER = 9;
183
184    /**
185     * Unicode category constant Nl.
186     */
187    public static final byte LETTER_NUMBER = 10;
188
189    /**
190     * Unicode category constant No.
191     */
192    public static final byte OTHER_NUMBER = 11;
193
194    /**
195     * Unicode category constant Zs.
196     */
197    public static final byte SPACE_SEPARATOR = 12;
198
199    /**
200     * Unicode category constant Zl.
201     */
202    public static final byte LINE_SEPARATOR = 13;
203
204    /**
205     * Unicode category constant Zp.
206     */
207    public static final byte PARAGRAPH_SEPARATOR = 14;
208
209    /**
210     * Unicode category constant Cc.
211     */
212    public static final byte CONTROL = 15;
213
214    /**
215     * Unicode category constant Cf.
216     */
217    public static final byte FORMAT = 16;
218
219    /**
220     * Unicode category constant Co.
221     */
222    public static final byte PRIVATE_USE = 18;
223
224    /**
225     * Unicode category constant Cs.
226     */
227    public static final byte SURROGATE = 19;
228
229    /**
230     * Unicode category constant Pd.
231     */
232    public static final byte DASH_PUNCTUATION = 20;
233
234    /**
235     * Unicode category constant Ps.
236     */
237    public static final byte START_PUNCTUATION = 21;
238
239    /**
240     * Unicode category constant Pe.
241     */
242    public static final byte END_PUNCTUATION = 22;
243
244    /**
245     * Unicode category constant Pc.
246     */
247    public static final byte CONNECTOR_PUNCTUATION = 23;
248
249    /**
250     * Unicode category constant Po.
251     */
252    public static final byte OTHER_PUNCTUATION = 24;
253
254    /**
255     * Unicode category constant Sm.
256     */
257    public static final byte MATH_SYMBOL = 25;
258
259    /**
260     * Unicode category constant Sc.
261     */
262    public static final byte CURRENCY_SYMBOL = 26;
263
264    /**
265     * Unicode category constant Sk.
266     */
267    public static final byte MODIFIER_SYMBOL = 27;
268
269    /**
270     * Unicode category constant So.
271     */
272    public static final byte OTHER_SYMBOL = 28;
273
274    /**
275     * Unicode category constant Pi.
276     *
277     * @since 1.4
278     */
279    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
280
281    /**
282     * Unicode category constant Pf.
283     *
284     * @since 1.4
285     */
286    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
287
288    /**
289     * Unicode bidirectional constant.
290     *
291     * @since 1.4
292     */
293    public static final byte DIRECTIONALITY_UNDEFINED = -1;
294
295    /**
296     * Unicode bidirectional constant L.
297     *
298     * @since 1.4
299     */
300    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
301
302    /**
303     * Unicode bidirectional constant R.
304     *
305     * @since 1.4
306     */
307    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
308
309    /**
310     * Unicode bidirectional constant AL.
311     *
312     * @since 1.4
313     */
314    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
315
316    /**
317     * Unicode bidirectional constant EN.
318     *
319     * @since 1.4
320     */
321    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
322
323    /**
324     * Unicode bidirectional constant ES.
325     *
326     * @since 1.4
327     */
328    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
329
330    /**
331     * Unicode bidirectional constant ET.
332     *
333     * @since 1.4
334     */
335    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
336
337    /**
338     * Unicode bidirectional constant AN.
339     *
340     * @since 1.4
341     */
342    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
343
344    /**
345     * Unicode bidirectional constant CS.
346     *
347     * @since 1.4
348     */
349    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
350
351    /**
352     * Unicode bidirectional constant NSM.
353     *
354     * @since 1.4
355     */
356    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
357
358    /**
359     * Unicode bidirectional constant BN.
360     *
361     * @since 1.4
362     */
363    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
364
365    /**
366     * Unicode bidirectional constant B.
367     *
368     * @since 1.4
369     */
370    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
371
372    /**
373     * Unicode bidirectional constant S.
374     *
375     * @since 1.4
376     */
377    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
378
379    /**
380     * Unicode bidirectional constant WS.
381     *
382     * @since 1.4
383     */
384    public static final byte DIRECTIONALITY_WHITESPACE = 12;
385
386    /**
387     * Unicode bidirectional constant ON.
388     *
389     * @since 1.4
390     */
391    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
392
393    /**
394     * Unicode bidirectional constant LRE.
395     *
396     * @since 1.4
397     */
398    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
399
400    /**
401     * Unicode bidirectional constant LRO.
402     *
403     * @since 1.4
404     */
405    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
406
407    /**
408     * Unicode bidirectional constant RLE.
409     *
410     * @since 1.4
411     */
412    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
413
414    /**
415     * Unicode bidirectional constant RLO.
416     *
417     * @since 1.4
418     */
419    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
420
421    /**
422     * Unicode bidirectional constant PDF.
423     *
424     * @since 1.4
425     */
426    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
427
428    /**
429     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
430     * encoding, {@code '\uD800'}.
431     *
432     * @since 1.5
433     */
434    public static final char MIN_HIGH_SURROGATE = '\uD800';
435
436    /**
437     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
438     * encoding, {@code '\uDBFF'}.
439     *
440     * @since 1.5
441     */
442    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
443
444    /**
445     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
446     * encoding, {@code '\uDC00'}.
447     *
448     * @since 1.5
449     */
450    public static final char MIN_LOW_SURROGATE = '\uDC00';
451
452    /**
453     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
454     * encoding, {@code '\uDFFF'}.
455     *
456     * @since 1.5
457     */
458    public static final char MAX_LOW_SURROGATE = '\uDFFF';
459
460    /**
461     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
462     *
463     * @since 1.5
464     */
465    public static final char MIN_SURROGATE = '\uD800';
466
467    /**
468     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
469     *
470     * @since 1.5
471     */
472    public static final char MAX_SURROGATE = '\uDFFF';
473
474    /**
475     * The minimum value of a supplementary code point, {@code U+010000}.
476     *
477     * @since 1.5
478     */
479    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
480
481    /**
482     * The minimum code point value, {@code U+0000}.
483     *
484     * @since 1.5
485     */
486    public static final int MIN_CODE_POINT = 0x000000;
487
488    /**
489     * The maximum code point value, {@code U+10FFFF}.
490     *
491     * @since 1.5
492     */
493    public static final int MAX_CODE_POINT = 0x10FFFF;
494
495    /**
496     * The number of bits required to represent a {@code Character} value
497     * unsigned form.
498     *
499     * @since 1.5
500     */
501    public static final int SIZE = 16;
502
503    private static final byte[] DIRECTIONALITY = new byte[] {
504            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
505            DIRECTIONALITY_EUROPEAN_NUMBER,
506            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
507            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
508            DIRECTIONALITY_ARABIC_NUMBER,
509            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
510            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
511            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
512            DIRECTIONALITY_OTHER_NEUTRALS,
513            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
514            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
515            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
516            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
517            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
518            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
519            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
520
521    /*
522     * Represents a subset of the Unicode character set.
523     */
524    public static class Subset {
525        String name;
526
527        /**
528         * Constructs a new {@code Subset}.
529         *
530         * @param string
531         *            this subset's name.
532         */
533        protected Subset(String string) {
534            if (string == null) {
535                throw new NullPointerException("string == null");
536            }
537            name = string;
538        }
539
540        /**
541         * Compares this character subset with the specified object. Uses
542         * {@link java.lang.Object#equals(Object)} to do the comparison.
543         *
544         * @param object
545         *            the object to compare this character subset with.
546         * @return {@code true} if {@code object} is this subset, that is, if
547         *         {@code object == this}; {@code false} otherwise.
548         */
549        @Override
550        public final boolean equals(Object object) {
551            return super.equals(object);
552        }
553
554        /**
555         * Returns the integer hash code for this character subset.
556         *
557         * @return this subset's hash code, which is the hash code computed by
558         *         {@link java.lang.Object#hashCode()}.
559         */
560        @Override
561        public final int hashCode() {
562            return super.hashCode();
563        }
564
565        /**
566         * Returns the string representation of this subset.
567         *
568         * @return this subset's name.
569         */
570        @Override
571        public final String toString() {
572            return name;
573        }
574    }
575
576    /**
577     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
578     * specification.
579     *
580     * @since 1.2
581     */
582    public static final class UnicodeBlock extends Subset {
583        /**
584         * The &quot;Surrogates Area&quot; Unicode Block.
585         *
586         * @deprecated As of Java 5, this block has been replaced by
587         *             {@link #HIGH_SURROGATES},
588         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
589         *             {@link #LOW_SURROGATES}.
590         */
591        @Deprecated
592        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
593        /**
594         * The &quot;Basic Latin&quot; Unicode Block.
595         *
596         * @since 1.2
597         */
598        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
599        /**
600         * The &quot;Latin-1 Supplement&quot; Unicode Block.
601         *
602         * @since 1.2
603         */
604        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
605        /**
606         * The &quot;Latin Extended-A&quot; Unicode Block.
607         *
608         * @since 1.2
609         */
610        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
611        /**
612         * The &quot;Latin Extended-B&quot; Unicode Block.
613         *
614         * @since 1.2
615         */
616        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
617        /**
618         * The &quot;IPA Extensions&quot; Unicode Block.
619         *
620         * @since 1.2
621         */
622        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
623        /**
624         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
625         *
626         * @since 1.2
627         */
628        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
629        /**
630         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
631         *
632         * @since 1.2
633         */
634        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
635        /**
636         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
637         * to as &quot;Greek&quot;.
638         *
639         * @since 1.2
640         */
641        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
642        /**
643         * The &quot;Cyrillic&quot; Unicode Block.
644         *
645         * @since 1.2
646         */
647        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
648        /**
649         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
650         * referred to as &quot;Cyrillic Supplementary&quot;.
651         *
652         * @since 1.5
653         */
654        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
655        /**
656         * The &quot;Armenian&quot; Unicode Block.
657         *
658         * @since 1.2
659         */
660        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
661        /**
662         * The &quot;Hebrew&quot; Unicode Block.
663         *
664         * @since 1.2
665         */
666        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
667        /**
668         * The &quot;Arabic&quot; Unicode Block.
669         *
670         * @since 1.2
671         */
672        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
673        /**
674         * The &quot;Syriac&quot; Unicode Block.
675         *
676         * @since 1.4
677         */
678        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
679        /**
680         * The &quot;Thaana&quot; Unicode Block.
681         *
682         * @since 1.4
683         */
684        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
685        /**
686         * The &quot;Devanagari&quot; Unicode Block.
687         *
688         * @since 1.2
689         */
690        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
691        /**
692         * The &quot;Bengali&quot; Unicode Block.
693         *
694         * @since 1.2
695         */
696        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
697        /**
698         * The &quot;Gurmukhi&quot; Unicode Block.
699         *
700         * @since 1.2
701         */
702        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
703        /**
704         * The &quot;Gujarati&quot; Unicode Block.
705         *
706         * @since 1.2
707         */
708        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
709        /**
710         * The &quot;Oriya&quot; Unicode Block.
711         *
712         * @since 1.2
713         */
714        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
715        /**
716         * The &quot;Tamil&quot; Unicode Block.
717         *
718         * @since 1.2
719         */
720        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
721        /**
722         * The &quot;Telugu&quot; Unicode Block.
723         *
724         * @since 1.2
725         */
726        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
727        /**
728         * The &quot;Kannada&quot; Unicode Block.
729         *
730         * @since 1.2
731         */
732        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
733        /**
734         * The &quot;Malayalam&quot; Unicode Block.
735         *
736         * @since 1.2
737         */
738        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
739        /**
740         * The &quot;Sinhala&quot; Unicode Block.
741         *
742         * @since 1.4
743         */
744        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
745        /**
746         * The &quot;Thai&quot; Unicode Block.
747         *
748         * @since 1.2
749         */
750        public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
751        /**
752         * The &quot;Lao&quot; Unicode Block.
753         *
754         * @since 1.2
755         */
756        public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
757        /**
758         * The &quot;Tibetan&quot; Unicode Block.
759         *
760         * @since 1.2
761         */
762        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
763        /**
764         * The &quot;Myanmar&quot; Unicode Block.
765         *
766         * @since 1.4
767         */
768        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
769        /**
770         * The &quot;Georgian&quot; Unicode Block.
771         *
772         * @since 1.2
773         */
774        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
775        /**
776         * The &quot;Hangul Jamo&quot; Unicode Block.
777         *
778         * @since 1.2
779         */
780        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
781        /**
782         * The &quot;Ethiopic&quot; Unicode Block.
783         *
784         * @since 1.4
785         */
786        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
787        /**
788         * The &quot;Cherokee&quot; Unicode Block.
789         *
790         * @since 1.4
791         */
792        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
793        /**
794         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
795         *
796         * @since 1.4
797         */
798        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
799        /**
800         * The &quot;Ogham&quot; Unicode Block.
801         *
802         * @since 1.4
803         */
804        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
805        /**
806         * The &quot;Runic&quot; Unicode Block.
807         *
808         * @since 1.4
809         */
810        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
811        /**
812         * The &quot;Tagalog&quot; Unicode Block.
813         *
814         * @since 1.5
815         */
816        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
817        /**
818         * The &quot;Hanunoo&quot; Unicode Block.
819         *
820         * @since 1.5
821         */
822        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
823        /**
824         * The &quot;Buhid&quot; Unicode Block.
825         *
826         * @since 1.5
827         */
828        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
829        /**
830         * The &quot;Tagbanwa&quot; Unicode Block.
831         *
832         * @since 1.5
833         */
834        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
835        /**
836         * The &quot;Khmer&quot; Unicode Block.
837         *
838         * @since 1.4
839         */
840        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
841        /**
842         * The &quot;Mongolian&quot; Unicode Block.
843         *
844         * @since 1.4
845         */
846        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
847        /**
848         * The &quot;Limbu&quot; Unicode Block.
849         *
850         * @since 1.5
851         */
852        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
853        /**
854         * The &quot;Tai Le&quot; Unicode Block.
855         *
856         * @since 1.5
857         */
858        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
859        /**
860         * The &quot;Khmer Symbols&quot; Unicode Block.
861         *
862         * @since 1.5
863         */
864        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
865        /**
866         * The &quot;Phonetic Extensions&quot; Unicode Block.
867         *
868         * @since 1.5
869         */
870        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
871        /**
872         * The &quot;Latin Extended Additional&quot; Unicode Block.
873         *
874         * @since 1.2
875         */
876        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
877        /**
878         * The &quot;Greek Extended&quot; Unicode Block.
879         *
880         * @since 1.2
881         */
882        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
883        /**
884         * The &quot;General Punctuation&quot; Unicode Block.
885         *
886         * @since 1.2
887         */
888        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
889        /**
890         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
891         *
892         * @since 1.2
893         */
894        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
895        /**
896         * The &quot;Currency Symbols&quot; Unicode Block.
897         *
898         * @since 1.2
899         */
900        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
901        /**
902         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
903         * Block. Previously referred to as &quot;Combining Marks for
904         * Symbols&quot;.
905         *
906         * @since 1.2
907         */
908        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
909        /**
910         * The &quot;Letterlike Symbols&quot; Unicode Block.
911         *
912         * @since 1.2
913         */
914        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
915        /**
916         * The &quot;Number Forms&quot; Unicode Block.
917         *
918         * @since 1.2
919         */
920        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
921        /**
922         * The &quot;Arrows&quot; Unicode Block.
923         *
924         * @since 1.2
925         */
926        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
927        /**
928         * The &quot;Mathematical Operators&quot; Unicode Block.
929         *
930         * @since 1.2
931         */
932        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
933        /**
934         * The &quot;Miscellaneous Technical&quot; Unicode Block.
935         *
936         * @since 1.2
937         */
938        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
939        /**
940         * The &quot;Control Pictures&quot; Unicode Block.
941         *
942         * @since 1.2
943         */
944        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
945        /**
946         * The &quot;Optical Character Recognition&quot; Unicode Block.
947         *
948         * @since 1.2
949         */
950        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
951        /**
952         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
953         *
954         * @since 1.2
955         */
956        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
957        /**
958         * The &quot;Box Drawing&quot; Unicode Block.
959         *
960         * @since 1.2
961         */
962        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
963        /**
964         * The &quot;Block Elements&quot; Unicode Block.
965         *
966         * @since 1.2
967         */
968        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
969        /**
970         * The &quot;Geometric Shapes&quot; Unicode Block.
971         *
972         * @since 1.2
973         */
974        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
975        /**
976         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
977         *
978         * @since 1.2
979         */
980        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
981        /**
982         * The &quot;Dingbats&quot; Unicode Block.
983         *
984         * @since 1.2
985         */
986        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
987        /**
988         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
989         *
990         * @since 1.5
991         */
992        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
993        /**
994         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
995         *
996         * @since 1.5
997         */
998        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
999        /**
1000         * The &quot;Braille Patterns&quot; Unicode Block.
1001         *
1002         * @since 1.4
1003         */
1004        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
1005        /**
1006         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
1007         *
1008         * @since 1.5
1009         */
1010        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1011        /**
1012         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
1013         *
1014         * @since 1.5
1015         */
1016        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1017        /**
1018         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1019         *
1020         * @since 1.5
1021         */
1022        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1023        /**
1024         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1025         *
1026         * @since 1.2
1027         */
1028        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1029        /**
1030         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1031         *
1032         * @since 1.4
1033         */
1034        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1035        /**
1036         * The &quot;Kangxi Radicals&quot; Unicode Block.
1037         *
1038         * @since 1.4
1039         */
1040        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
1041        /**
1042         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1043         *
1044         * @since 1.4
1045         */
1046        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1047        /**
1048         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1049         *
1050         * @since 1.2
1051         */
1052        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1053        /**
1054         * The &quot;Hiragana&quot; Unicode Block.
1055         *
1056         * @since 1.2
1057         */
1058        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
1059        /**
1060         * The &quot;Katakana&quot; Unicode Block.
1061         *
1062         * @since 1.2
1063         */
1064        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
1065        /**
1066         * The &quot;Bopomofo&quot; Unicode Block.
1067         *
1068         * @since 1.2
1069         */
1070        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
1071        /**
1072         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1073         *
1074         * @since 1.2
1075         */
1076        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1077        /**
1078         * The &quot;Kanbun&quot; Unicode Block.
1079         *
1080         * @since 1.2
1081         */
1082        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
1083        /**
1084         * The &quot;Bopomofo Extended&quot; Unicode Block.
1085         *
1086         * @since 1.4
1087         */
1088        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1089        /**
1090         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1091         *
1092         * @since 1.5
1093         */
1094        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1095        /**
1096         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1097         *
1098         * @since 1.2
1099         */
1100        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1101        /**
1102         * The &quot;CJK Compatibility&quot; Unicode Block.
1103         *
1104         * @since 1.2
1105         */
1106        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
1107        /**
1108         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1109         *
1110         * @since 1.4
1111         */
1112        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1113        /**
1114         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1115         *
1116         * @since 1.5
1117         */
1118        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1119        /**
1120         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1121         *
1122         * @since 1.2
1123         */
1124        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1125        /**
1126         * The &quot;Yi Syllables&quot; Unicode Block.
1127         *
1128         * @since 1.4
1129         */
1130        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
1131        /**
1132         * The &quot;Yi Radicals&quot; Unicode Block.
1133         *
1134         * @since 1.4
1135         */
1136        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
1137        /**
1138         * The &quot;Hangul Syllables&quot; Unicode Block.
1139         *
1140         * @since 1.2
1141         */
1142        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
1143        /**
1144         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1145         * code point values in the high surrogate range 0xD800 to 0xDB7F
1146         */
1147        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
1148        /**
1149         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1150         * represents code point values in the high surrogate range 0xDB80 to
1151         * 0xDBFF
1152         */
1153        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1154        /**
1155         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1156         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1157         */
1158        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
1159        /**
1160         * The &quot;Private Use Area&quot; Unicode Block.
1161         *
1162         * @since 1.2
1163         */
1164        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1165        /**
1166         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1167         *
1168         * @since 1.2
1169         */
1170        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1171        /**
1172         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1173         *
1174         * @since 1.2
1175         */
1176        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1177        /**
1178         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1179         *
1180         * @since 1.2
1181         */
1182        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1183        /**
1184         * The &quot;Variation Selectors&quot; Unicode Block.
1185         *
1186         * @since 1.5
1187         */
1188        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1189        /**
1190         * The &quot;Combining Half Marks&quot; Unicode Block.
1191         *
1192         * @since 1.2
1193         */
1194        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1195        /**
1196         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1197         *
1198         * @since 1.2
1199         */
1200        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1201        /**
1202         * The &quot;Small Form Variants&quot; Unicode Block.
1203         *
1204         * @since 1.2
1205         */
1206        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1207        /**
1208         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1209         *
1210         * @since 1.2
1211         */
1212        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1213        /**
1214         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1215         *
1216         * @since 1.2
1217         */
1218        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1219        /**
1220         * The &quot;Specials&quot; Unicode Block.
1221         *
1222         * @since 1.2
1223         */
1224        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
1225        /**
1226         * The &quot;Linear B Syllabary&quot; Unicode Block.
1227         *
1228         * @since 1.2
1229         */
1230        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1231        /**
1232         * The &quot;Linear B Ideograms&quot; Unicode Block.
1233         *
1234         * @since 1.5
1235         */
1236        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1237        /**
1238         * The &quot;Aegean Numbers&quot; Unicode Block.
1239         *
1240         * @since 1.5
1241         */
1242        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
1243        /**
1244         * The &quot;Old Italic&quot; Unicode Block.
1245         *
1246         * @since 1.5
1247         */
1248        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
1249        /**
1250         * The &quot;Gothic&quot; Unicode Block.
1251         *
1252         * @since 1.5
1253         */
1254        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
1255        /**
1256         * The &quot;Ugaritic&quot; Unicode Block.
1257         *
1258         * @since 1.5
1259         */
1260        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
1261        /**
1262         * The &quot;Deseret&quot; Unicode Block.
1263         *
1264         * @since 1.5
1265         */
1266        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
1267        /**
1268         * The &quot;Shavian&quot; Unicode Block.
1269         *
1270         * @since 1.5
1271         */
1272        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
1273        /**
1274         * The &quot;Osmanya&quot; Unicode Block.
1275         *
1276         * @since 1.5
1277         */
1278        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
1279        /**
1280         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1281         *
1282         * @since 1.5
1283         */
1284        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1285        /**
1286         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1287         *
1288         * @since 1.5
1289         */
1290        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1291        /**
1292         * The &quot;Musical Symbols&quot; Unicode Block.
1293         *
1294         * @since 1.5
1295         */
1296        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1297        /**
1298         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1299         *
1300         * @since 1.5
1301         */
1302        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1303        /**
1304         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1305         *
1306         * @since 1.5
1307         */
1308        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1309        /**
1310         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1311         *
1312         * @since 1.5
1313         */
1314        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1315        /**
1316         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1317         *
1318         * @since 1.5
1319         */
1320        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
1321        /**
1322         * The &quot;Tags&quot; Unicode Block.
1323         *
1324         * @since 1.5
1325         */
1326        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
1327        /**
1328         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1329         *
1330         * @since 1.5
1331         */
1332        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1333        /**
1334         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1335         *
1336         * @since 1.5
1337         */
1338        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1339        /**
1340         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1341         *
1342         * @since 1.5
1343         */
1344        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1345
1346        /*
1347         * All of the UnicodeBlocks with valid ranges in ascending order.
1348         */
1349        private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] {
1350            null,
1351            UnicodeBlock.BASIC_LATIN,
1352            UnicodeBlock.LATIN_1_SUPPLEMENT,
1353            UnicodeBlock.LATIN_EXTENDED_A,
1354            UnicodeBlock.LATIN_EXTENDED_B,
1355            UnicodeBlock.IPA_EXTENSIONS,
1356            UnicodeBlock.SPACING_MODIFIER_LETTERS,
1357            UnicodeBlock.COMBINING_DIACRITICAL_MARKS,
1358            UnicodeBlock.GREEK,
1359            UnicodeBlock.CYRILLIC,
1360            UnicodeBlock.ARMENIAN,
1361            UnicodeBlock.HEBREW,
1362            UnicodeBlock.ARABIC,
1363            UnicodeBlock.SYRIAC,
1364            UnicodeBlock.THAANA,
1365            UnicodeBlock.DEVANAGARI,
1366            UnicodeBlock.BENGALI,
1367            UnicodeBlock.GURMUKHI,
1368            UnicodeBlock.GUJARATI,
1369            UnicodeBlock.ORIYA,
1370            UnicodeBlock.TAMIL,
1371            UnicodeBlock.TELUGU,
1372            UnicodeBlock.KANNADA,
1373            UnicodeBlock.MALAYALAM,
1374            UnicodeBlock.SINHALA,
1375            UnicodeBlock.THAI,
1376            UnicodeBlock.LAO,
1377            UnicodeBlock.TIBETAN,
1378            UnicodeBlock.MYANMAR,
1379            UnicodeBlock.GEORGIAN,
1380            UnicodeBlock.HANGUL_JAMO,
1381            UnicodeBlock.ETHIOPIC,
1382            UnicodeBlock.CHEROKEE,
1383            UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1384            UnicodeBlock.OGHAM,
1385            UnicodeBlock.RUNIC,
1386            UnicodeBlock.KHMER,
1387            UnicodeBlock.MONGOLIAN,
1388            UnicodeBlock.LATIN_EXTENDED_ADDITIONAL,
1389            UnicodeBlock.GREEK_EXTENDED,
1390            UnicodeBlock.GENERAL_PUNCTUATION,
1391            UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS,
1392            UnicodeBlock.CURRENCY_SYMBOLS,
1393            UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS,
1394            UnicodeBlock.LETTERLIKE_SYMBOLS,
1395            UnicodeBlock.NUMBER_FORMS,
1396            UnicodeBlock.ARROWS,
1397            UnicodeBlock.MATHEMATICAL_OPERATORS,
1398            UnicodeBlock.MISCELLANEOUS_TECHNICAL,
1399            UnicodeBlock.CONTROL_PICTURES,
1400            UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION,
1401            UnicodeBlock.ENCLOSED_ALPHANUMERICS,
1402            UnicodeBlock.BOX_DRAWING,
1403            UnicodeBlock.BLOCK_ELEMENTS,
1404            UnicodeBlock.GEOMETRIC_SHAPES,
1405            UnicodeBlock.MISCELLANEOUS_SYMBOLS,
1406            UnicodeBlock.DINGBATS,
1407            UnicodeBlock.BRAILLE_PATTERNS,
1408            UnicodeBlock.CJK_RADICALS_SUPPLEMENT,
1409            UnicodeBlock.KANGXI_RADICALS,
1410            UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1411            UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION,
1412            UnicodeBlock.HIRAGANA,
1413            UnicodeBlock.KATAKANA,
1414            UnicodeBlock.BOPOMOFO,
1415            UnicodeBlock.HANGUL_COMPATIBILITY_JAMO,
1416            UnicodeBlock.KANBUN,
1417            UnicodeBlock.BOPOMOFO_EXTENDED,
1418            UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS,
1419            UnicodeBlock.CJK_COMPATIBILITY,
1420            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1421            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS,
1422            UnicodeBlock.YI_SYLLABLES,
1423            UnicodeBlock.YI_RADICALS,
1424            UnicodeBlock.HANGUL_SYLLABLES,
1425            UnicodeBlock.HIGH_SURROGATES,
1426            UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES,
1427            UnicodeBlock.LOW_SURROGATES,
1428            UnicodeBlock.PRIVATE_USE_AREA,
1429            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS,
1430            UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS,
1431            UnicodeBlock.ARABIC_PRESENTATION_FORMS_A,
1432            UnicodeBlock.COMBINING_HALF_MARKS,
1433            UnicodeBlock.CJK_COMPATIBILITY_FORMS,
1434            UnicodeBlock.SMALL_FORM_VARIANTS,
1435            UnicodeBlock.ARABIC_PRESENTATION_FORMS_B,
1436            UnicodeBlock.SPECIALS,
1437            UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
1438            UnicodeBlock.OLD_ITALIC,
1439            UnicodeBlock.GOTHIC,
1440            UnicodeBlock.DESERET,
1441            UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS,
1442            UnicodeBlock.MUSICAL_SYMBOLS,
1443            UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1444            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1445            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1446            UnicodeBlock.TAGS,
1447            UnicodeBlock.CYRILLIC_SUPPLEMENTARY,
1448            UnicodeBlock.TAGALOG,
1449            UnicodeBlock.HANUNOO,
1450            UnicodeBlock.BUHID,
1451            UnicodeBlock.TAGBANWA,
1452            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1453            UnicodeBlock.SUPPLEMENTAL_ARROWS_A,
1454            UnicodeBlock.SUPPLEMENTAL_ARROWS_B,
1455            UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1456            UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1457            UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS,
1458            UnicodeBlock.VARIATION_SELECTORS,
1459            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1460            UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1461            UnicodeBlock.LIMBU,
1462            UnicodeBlock.TAI_LE,
1463            UnicodeBlock.KHMER_SYMBOLS,
1464            UnicodeBlock.PHONETIC_EXTENSIONS,
1465            UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1466            UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS,
1467            UnicodeBlock.LINEAR_B_SYLLABARY,
1468            UnicodeBlock.LINEAR_B_IDEOGRAMS,
1469            UnicodeBlock.AEGEAN_NUMBERS,
1470            UnicodeBlock.UGARITIC,
1471            UnicodeBlock.SHAVIAN,
1472            UnicodeBlock.OSMANYA,
1473            UnicodeBlock.CYPRIOT_SYLLABARY,
1474            UnicodeBlock.TAI_XUAN_JING_SYMBOLS,
1475            UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT
1476        };
1477
1478        /**
1479         * Retrieves the constant that corresponds to the specified block name.
1480         * The block names are defined by the Unicode 4.0.1 specification in the
1481         * {@code Blocks-4.0.1.txt} file.
1482         * <p>
1483         * Block names may be one of the following:
1484         * <ul>
1485         * <li>Canonical block name, as defined by the Unicode specification;
1486         * case-insensitive.</li>
1487         * <li>Canonical block name without any spaces, as defined by the
1488         * Unicode specification; case-insensitive.</li>
1489         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1490         * uppercasing the canonical name and replacing all spaces and hyphens
1491         * with underscores.</li>
1492         * </ul>
1493         *
1494         * @param blockName
1495         *            the name of the block to retrieve.
1496         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1497         * @throws NullPointerException
1498         *             if {@code blockName} is {@code null}.
1499         * @throws IllegalArgumentException
1500         *             if {@code blockName} is not a valid block name.
1501         * @since 1.5
1502         */
1503        public static UnicodeBlock forName(String blockName) {
1504            if (blockName == null) {
1505                throw new NullPointerException("blockName == null");
1506            }
1507            int block = forNameImpl(blockName);
1508            if (block == -1) {
1509                if (blockName.equals("SURROGATES_AREA")) {
1510                    return SURROGATES_AREA;
1511                } else if(blockName.equalsIgnoreCase("greek")) {
1512                    return GREEK;
1513                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1514                        blockName.equals("Combining Marks for Symbols") ||
1515                        blockName.equals("CombiningMarksforSymbols")) {
1516                    return COMBINING_MARKS_FOR_SYMBOLS;
1517                }
1518                throw new IllegalArgumentException("Bad block name: " + blockName);
1519            }
1520            return BLOCKS[block];
1521        }
1522
1523        /**
1524         * Gets the constant for the Unicode block that contains the specified
1525         * character.
1526         *
1527         * @param c
1528         *            the character for which to get the {@code UnicodeBlock}
1529         *            constant.
1530         * @return the {@code UnicodeBlock} constant for the block that contains
1531         *         {@code c}, or {@code null} if {@code c} does not belong to
1532         *         any defined block.
1533         */
1534        public static UnicodeBlock of(char c) {
1535            return of((int) c);
1536        }
1537
1538        /**
1539         * Gets the constant for the Unicode block that contains the specified
1540         * Unicode code point.
1541         *
1542         * @param codePoint
1543         *            the Unicode code point for which to get the
1544         *            {@code UnicodeBlock} constant.
1545         * @return the {@code UnicodeBlock} constant for the block that contains
1546         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1547         *         not belong to any defined block.
1548         * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
1549         * @since 1.5
1550         */
1551        public static UnicodeBlock of(int codePoint) {
1552            checkValidCodePoint(codePoint);
1553            int block = ofImpl(codePoint);
1554            if (block == -1 || block >= BLOCKS.length) {
1555                return null;
1556            }
1557            return BLOCKS[block];
1558        }
1559
1560        private UnicodeBlock(String blockName, int start, int end) {
1561            super(blockName);
1562        }
1563    }
1564
1565    private static native int forNameImpl(String blockName);
1566
1567    private static native int ofImpl(int codePoint);
1568
1569    /**
1570     * Constructs a new {@code Character} with the specified primitive char
1571     * value.
1572     *
1573     * @param value
1574     *            the primitive char value to store in the new instance.
1575     */
1576    public Character(char value) {
1577        this.value = value;
1578    }
1579
1580    /**
1581     * Gets the primitive value of this character.
1582     *
1583     * @return this object's primitive value.
1584     */
1585    public char charValue() {
1586        return value;
1587    }
1588
1589    private static void checkValidCodePoint(int codePoint) {
1590        if (!isValidCodePoint(codePoint)) {
1591            throw new IllegalArgumentException("Invalid code point: " + codePoint);
1592        }
1593    }
1594
1595    /**
1596     * Compares this object to the specified character object to determine their
1597     * relative order.
1598     *
1599     * @param c
1600     *            the character object to compare this object to.
1601     * @return {@code 0} if the value of this character and the value of
1602     *         {@code c} are equal; a positive value if the value of this
1603     *         character is greater than the value of {@code c}; a negative
1604     *         value if the value of this character is less than the value of
1605     *         {@code c}.
1606     * @see java.lang.Comparable
1607     * @since 1.2
1608     */
1609    public int compareTo(Character c) {
1610        return compare(value, c.value);
1611    }
1612
1613    /**
1614     * Compares two {@code char} values.
1615     * @return 0 if lhs = rhs, less than 0 if lhs &lt; rhs, and greater than 0 if lhs &gt; rhs.
1616     * @since 1.7
1617     */
1618    public static int compare(char lhs, char rhs) {
1619        return lhs - rhs;
1620    }
1621
1622    /**
1623     * Returns a {@code Character} instance for the {@code char} value passed.
1624     * <p>
1625     * If it is not necessary to get a new {@code Character} instance, it is
1626     * recommended to use this method instead of the constructor, since it
1627     * maintains a cache of instances which may result in better performance.
1628     *
1629     * @param c
1630     *            the char value for which to get a {@code Character} instance.
1631     * @return the {@code Character} instance for {@code c}.
1632     * @since 1.5
1633     */
1634    public static Character valueOf(char c) {
1635        return c < 128 ? SMALL_VALUES[c] : new Character(c);
1636    }
1637
1638    /**
1639     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1640     */
1641    private static final Character[] SMALL_VALUES = new Character[128];
1642
1643    static {
1644        for (int i = 0; i < 128; i++) {
1645            SMALL_VALUES[i] = new Character((char) i);
1646        }
1647    }
1648    /**
1649     * Indicates whether {@code codePoint} is a valid Unicode code point.
1650     *
1651     * @param codePoint
1652     *            the code point to test.
1653     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1654     *         {@code false} otherwise.
1655     * @since 1.5
1656     */
1657    public static boolean isValidCodePoint(int codePoint) {
1658        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1659    }
1660
1661    /**
1662     * Indicates whether {@code codePoint} is within the supplementary code
1663     * point range.
1664     *
1665     * @param codePoint
1666     *            the code point to test.
1667     * @return {@code true} if {@code codePoint} is within the supplementary
1668     *         code point range; {@code false} otherwise.
1669     * @since 1.5
1670     */
1671    public static boolean isSupplementaryCodePoint(int codePoint) {
1672        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1673    }
1674
1675    /**
1676     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1677     * that is used for representing supplementary characters in UTF-16
1678     * encoding.
1679     *
1680     * @param ch
1681     *            the character to test.
1682     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1683     *         {@code false} otherwise.
1684     * @see #isLowSurrogate(char)
1685     * @since 1.5
1686     */
1687    public static boolean isHighSurrogate(char ch) {
1688        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1689    }
1690
1691    /**
1692     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1693     * that is used for representing supplementary characters in UTF-16
1694     * encoding.
1695     *
1696     * @param ch
1697     *            the character to test.
1698     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1699     *         {@code false} otherwise.
1700     * @see #isHighSurrogate(char)
1701     * @since 1.5
1702     */
1703    public static boolean isLowSurrogate(char ch) {
1704        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1705    }
1706
1707    /**
1708     * Returns true if the given character is a high or low surrogate.
1709     * @since 1.7
1710     */
1711    public static boolean isSurrogate(char ch) {
1712        return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
1713    }
1714
1715    /**
1716     * Indicates whether the specified character pair is a valid surrogate pair.
1717     *
1718     * @param high
1719     *            the high surrogate unit to test.
1720     * @param low
1721     *            the low surrogate unit to test.
1722     * @return {@code true} if {@code high} is a high-surrogate code unit and
1723     *         {@code low} is a low-surrogate code unit; {@code false}
1724     *         otherwise.
1725     * @see #isHighSurrogate(char)
1726     * @see #isLowSurrogate(char)
1727     * @since 1.5
1728     */
1729    public static boolean isSurrogatePair(char high, char low) {
1730        return (isHighSurrogate(high) && isLowSurrogate(low));
1731    }
1732
1733    /**
1734     * Calculates the number of {@code char} values required to represent the
1735     * specified Unicode code point. This method checks if the {@code codePoint}
1736     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1737     * returned, otherwise {@code 1}. To test if the code point is valid, use
1738     * the {@link #isValidCodePoint(int)} method.
1739     *
1740     * @param codePoint
1741     *            the code point for which to calculate the number of required
1742     *            chars.
1743     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1744     * @see #isValidCodePoint(int)
1745     * @see #isSupplementaryCodePoint(int)
1746     * @since 1.5
1747     */
1748    public static int charCount(int codePoint) {
1749        return (codePoint >= 0x10000 ? 2 : 1);
1750    }
1751
1752    /**
1753     * Converts a surrogate pair into a Unicode code point. This method assumes
1754     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1755     * surrogates, then the result is indeterminate. The
1756     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1757     * method to validate the pair.
1758     *
1759     * @param high
1760     *            the high surrogate unit.
1761     * @param low
1762     *            the low surrogate unit.
1763     * @return the Unicode code point corresponding to the surrogate unit pair.
1764     * @see #isSurrogatePair(char, char)
1765     * @since 1.5
1766     */
1767    public static int toCodePoint(char high, char low) {
1768        // See RFC 2781, Section 2.2
1769        // http://www.ietf.org/rfc/rfc2781.txt
1770        int h = (high & 0x3FF) << 10;
1771        int l = low & 0x3FF;
1772        return (h | l) + 0x10000;
1773    }
1774
1775    /**
1776     * Returns the code point at {@code index} in the specified sequence of
1777     * character units. If the unit at {@code index} is a high-surrogate unit,
1778     * {@code index + 1} is less than the length of the sequence and the unit at
1779     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1780     * point represented by the pair is returned; otherwise the {@code char}
1781     * value at {@code index} is returned.
1782     *
1783     * @param seq
1784     *            the source sequence of {@code char} units.
1785     * @param index
1786     *            the position in {@code seq} from which to retrieve the code
1787     *            point.
1788     * @return the Unicode code point or {@code char} value at {@code index} in
1789     *         {@code seq}.
1790     * @throws NullPointerException
1791     *             if {@code seq} is {@code null}.
1792     * @throws IndexOutOfBoundsException
1793     *             if the {@code index} is negative or greater than or equal to
1794     *             the length of {@code seq}.
1795     * @since 1.5
1796     */
1797    public static int codePointAt(CharSequence seq, int index) {
1798        if (seq == null) {
1799            throw new NullPointerException("seq == null");
1800        }
1801        int len = seq.length();
1802        if (index < 0 || index >= len) {
1803            throw new IndexOutOfBoundsException();
1804        }
1805
1806        char high = seq.charAt(index++);
1807        if (index >= len) {
1808            return high;
1809        }
1810        char low = seq.charAt(index);
1811        if (isSurrogatePair(high, low)) {
1812            return toCodePoint(high, low);
1813        }
1814        return high;
1815    }
1816
1817    /**
1818     * Returns the code point at {@code index} in the specified array of
1819     * character units. If the unit at {@code index} is a high-surrogate unit,
1820     * {@code index + 1} is less than the length of the array and the unit at
1821     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1822     * point represented by the pair is returned; otherwise the {@code char}
1823     * value at {@code index} is returned.
1824     *
1825     * @param seq
1826     *            the source array of {@code char} units.
1827     * @param index
1828     *            the position in {@code seq} from which to retrieve the code
1829     *            point.
1830     * @return the Unicode code point or {@code char} value at {@code index} in
1831     *         {@code seq}.
1832     * @throws NullPointerException
1833     *             if {@code seq} is {@code null}.
1834     * @throws IndexOutOfBoundsException
1835     *             if the {@code index} is negative or greater than or equal to
1836     *             the length of {@code seq}.
1837     * @since 1.5
1838     */
1839    public static int codePointAt(char[] seq, int index) {
1840        if (seq == null) {
1841            throw new NullPointerException("seq == null");
1842        }
1843        int len = seq.length;
1844        if (index < 0 || index >= len) {
1845            throw new IndexOutOfBoundsException();
1846        }
1847
1848        char high = seq[index++];
1849        if (index >= len) {
1850            return high;
1851        }
1852        char low = seq[index];
1853        if (isSurrogatePair(high, low)) {
1854            return toCodePoint(high, low);
1855        }
1856        return high;
1857    }
1858
1859    /**
1860     * Returns the code point at {@code index} in the specified array of
1861     * character units, where {@code index} has to be less than {@code limit}.
1862     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1863     * is less than {@code limit} and the unit at {@code index + 1} is a
1864     * low-surrogate unit, then the supplementary code point represented by the
1865     * pair is returned; otherwise the {@code char} value at {@code index} is
1866     * returned.
1867     *
1868     * @param seq
1869     *            the source array of {@code char} units.
1870     * @param index
1871     *            the position in {@code seq} from which to get the code point.
1872     * @param limit
1873     *            the index after the last unit in {@code seq} that can be used.
1874     * @return the Unicode code point or {@code char} value at {@code index} in
1875     *         {@code seq}.
1876     * @throws NullPointerException
1877     *             if {@code seq} is {@code null}.
1878     * @throws IndexOutOfBoundsException
1879     *             if {@code index < 0}, {@code index >= limit},
1880     *             {@code limit < 0} or if {@code limit} is greater than the
1881     *             length of {@code seq}.
1882     * @since 1.5
1883     */
1884    public static int codePointAt(char[] seq, int index, int limit) {
1885        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1886            throw new IndexOutOfBoundsException();
1887        }
1888
1889        char high = seq[index++];
1890        if (index >= limit) {
1891            return high;
1892        }
1893        char low = seq[index];
1894        if (isSurrogatePair(high, low)) {
1895            return toCodePoint(high, low);
1896        }
1897        return high;
1898    }
1899
1900    /**
1901     * Returns the code point that precedes {@code index} in the specified
1902     * sequence of character units. If the unit at {@code index - 1} is a
1903     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1904     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1905     * point represented by the pair is returned; otherwise the {@code char}
1906     * value at {@code index - 1} is returned.
1907     *
1908     * @param seq
1909     *            the source sequence of {@code char} units.
1910     * @param index
1911     *            the position in {@code seq} following the code
1912     *            point that should be returned.
1913     * @return the Unicode code point or {@code char} value before {@code index}
1914     *         in {@code seq}.
1915     * @throws NullPointerException
1916     *             if {@code seq} is {@code null}.
1917     * @throws IndexOutOfBoundsException
1918     *             if the {@code index} is less than 1 or greater than the
1919     *             length of {@code seq}.
1920     * @since 1.5
1921     */
1922    public static int codePointBefore(CharSequence seq, int index) {
1923        if (seq == null) {
1924            throw new NullPointerException("seq == null");
1925        }
1926        int len = seq.length();
1927        if (index < 1 || index > len) {
1928            throw new IndexOutOfBoundsException();
1929        }
1930
1931        char low = seq.charAt(--index);
1932        if (--index < 0) {
1933            return low;
1934        }
1935        char high = seq.charAt(index);
1936        if (isSurrogatePair(high, low)) {
1937            return toCodePoint(high, low);
1938        }
1939        return low;
1940    }
1941
1942    /**
1943     * Returns the code point that precedes {@code index} in the specified
1944     * array of character units. If the unit at {@code index - 1} is a
1945     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1946     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1947     * point represented by the pair is returned; otherwise the {@code char}
1948     * value at {@code index - 1} is returned.
1949     *
1950     * @param seq
1951     *            the source array of {@code char} units.
1952     * @param index
1953     *            the position in {@code seq} following the code
1954     *            point that should be returned.
1955     * @return the Unicode code point or {@code char} value before {@code index}
1956     *         in {@code seq}.
1957     * @throws NullPointerException
1958     *             if {@code seq} is {@code null}.
1959     * @throws IndexOutOfBoundsException
1960     *             if the {@code index} is less than 1 or greater than the
1961     *             length of {@code seq}.
1962     * @since 1.5
1963     */
1964    public static int codePointBefore(char[] seq, int index) {
1965        if (seq == null) {
1966            throw new NullPointerException("seq == null");
1967        }
1968        int len = seq.length;
1969        if (index < 1 || index > len) {
1970            throw new IndexOutOfBoundsException();
1971        }
1972
1973        char low = seq[--index];
1974        if (--index < 0) {
1975            return low;
1976        }
1977        char high = seq[index];
1978        if (isSurrogatePair(high, low)) {
1979            return toCodePoint(high, low);
1980        }
1981        return low;
1982    }
1983
1984    /**
1985     * Returns the code point that precedes the {@code index} in the specified
1986     * array of character units and is not less than {@code start}. If the unit
1987     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1988     * less than {@code start} and the unit at {@code index - 2} is a
1989     * high-surrogate unit, then the supplementary code point represented by the
1990     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1991     * is returned.
1992     *
1993     * @param seq
1994     *            the source array of {@code char} units.
1995     * @param index
1996     *            the position in {@code seq} following the code point that
1997     *            should be returned.
1998     * @param start
1999     *            the index of the first element in {@code seq}.
2000     * @return the Unicode code point or {@code char} value before {@code index}
2001     *         in {@code seq}.
2002     * @throws NullPointerException
2003     *             if {@code seq} is {@code null}.
2004     * @throws IndexOutOfBoundsException
2005     *             if the {@code index <= start}, {@code start < 0},
2006     *             {@code index} is greater than the length of {@code seq}, or
2007     *             if {@code start} is equal or greater than the length of
2008     *             {@code seq}.
2009     * @since 1.5
2010     */
2011    public static int codePointBefore(char[] seq, int index, int start) {
2012        if (seq == null) {
2013            throw new NullPointerException("seq == null");
2014        }
2015        int len = seq.length;
2016        if (index <= start || index > len || start < 0 || start >= len) {
2017            throw new IndexOutOfBoundsException();
2018        }
2019
2020        char low = seq[--index];
2021        if (--index < start) {
2022            return low;
2023        }
2024        char high = seq[index];
2025        if (isSurrogatePair(high, low)) {
2026            return toCodePoint(high, low);
2027        }
2028        return low;
2029    }
2030
2031    /**
2032     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2033     * and copies the value(s) into the char array {@code dst}, starting at
2034     * index {@code dstIndex}.
2035     *
2036     * @param codePoint
2037     *            the Unicode code point to encode.
2038     * @param dst
2039     *            the destination array to copy the encoded value into.
2040     * @param dstIndex
2041     *            the index in {@code dst} from where to start copying.
2042     * @return the number of {@code char} value units copied into {@code dst}.
2043     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2044     * @throws NullPointerException
2045     *             if {@code dst} is {@code null}.
2046     * @throws IndexOutOfBoundsException
2047     *             if {@code dstIndex} is negative, greater than or equal to
2048     *             {@code dst.length} or equals {@code dst.length - 1} when
2049     *             {@code codePoint} is a
2050     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
2051     * @since 1.5
2052     */
2053    public static int toChars(int codePoint, char[] dst, int dstIndex) {
2054        checkValidCodePoint(codePoint);
2055        if (dst == null) {
2056            throw new NullPointerException("dst == null");
2057        }
2058        if (dstIndex < 0 || dstIndex >= dst.length) {
2059            throw new IndexOutOfBoundsException();
2060        }
2061
2062        if (isSupplementaryCodePoint(codePoint)) {
2063            if (dstIndex == dst.length - 1) {
2064                throw new IndexOutOfBoundsException();
2065            }
2066            // See RFC 2781, Section 2.1
2067            // http://www.ietf.org/rfc/rfc2781.txt
2068            int cpPrime = codePoint - 0x10000;
2069            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2070            int low = 0xDC00 | (cpPrime & 0x3FF);
2071            dst[dstIndex] = (char) high;
2072            dst[dstIndex + 1] = (char) low;
2073            return 2;
2074        }
2075
2076        dst[dstIndex] = (char) codePoint;
2077        return 1;
2078    }
2079
2080    /**
2081     * Converts the specified Unicode code point into a UTF-16 encoded sequence
2082     * and returns it as a char array.
2083     *
2084     * @param codePoint
2085     *            the Unicode code point to encode.
2086     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
2087     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
2088     *         then the returned array contains two characters, otherwise it
2089     *         contains just one character.
2090     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2091     * @since 1.5
2092     */
2093    public static char[] toChars(int codePoint) {
2094        checkValidCodePoint(codePoint);
2095        if (isSupplementaryCodePoint(codePoint)) {
2096            int cpPrime = codePoint - 0x10000;
2097            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2098            int low = 0xDC00 | (cpPrime & 0x3FF);
2099            return new char[] { (char) high, (char) low };
2100        }
2101        return new char[] { (char) codePoint };
2102    }
2103
2104    /**
2105     * Counts the number of Unicode code points in the subsequence of the
2106     * specified character sequence, as delineated by {@code beginIndex} and
2107     * {@code endIndex}. Any surrogate values with missing pair values will be
2108     * counted as one code point.
2109     *
2110     * @param seq
2111     *            the {@code CharSequence} to look through.
2112     * @param beginIndex
2113     *            the inclusive index to begin counting at.
2114     * @param endIndex
2115     *            the exclusive index to stop counting at.
2116     * @return the number of Unicode code points.
2117     * @throws NullPointerException
2118     *             if {@code seq} is {@code null}.
2119     * @throws IndexOutOfBoundsException
2120     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2121     *             if {@code endIndex} is greater than the length of {@code seq}.
2122     * @since 1.5
2123     */
2124    public static int codePointCount(CharSequence seq, int beginIndex,
2125            int endIndex) {
2126        if (seq == null) {
2127            throw new NullPointerException("seq == null");
2128        }
2129        int len = seq.length();
2130        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2131            throw new IndexOutOfBoundsException();
2132        }
2133
2134        int result = 0;
2135        for (int i = beginIndex; i < endIndex; i++) {
2136            char c = seq.charAt(i);
2137            if (isHighSurrogate(c)) {
2138                if (++i < endIndex) {
2139                    c = seq.charAt(i);
2140                    if (!isLowSurrogate(c)) {
2141                        result++;
2142                    }
2143                }
2144            }
2145            result++;
2146        }
2147        return result;
2148    }
2149
2150    /**
2151     * Counts the number of Unicode code points in the subsequence of the
2152     * specified char array, as delineated by {@code offset} and {@code count}.
2153     * Any surrogate values with missing pair values will be counted as one code
2154     * point.
2155     *
2156     * @param seq
2157     *            the char array to look through
2158     * @param offset
2159     *            the inclusive index to begin counting at.
2160     * @param count
2161     *            the number of {@code char} values to look through in
2162     *            {@code seq}.
2163     * @return the number of Unicode code points.
2164     * @throws NullPointerException
2165     *             if {@code seq} is {@code null}.
2166     * @throws IndexOutOfBoundsException
2167     *             if {@code offset < 0}, {@code count < 0} or if
2168     *             {@code offset + count} is greater than the length of
2169     *             {@code seq}.
2170     * @since 1.5
2171     */
2172    public static int codePointCount(char[] seq, int offset, int count) {
2173        Arrays.checkOffsetAndCount(seq.length, offset, count);
2174        int endIndex = offset + count;
2175        int result = 0;
2176        for (int i = offset; i < endIndex; i++) {
2177            char c = seq[i];
2178            if (isHighSurrogate(c)) {
2179                if (++i < endIndex) {
2180                    c = seq[i];
2181                    if (!isLowSurrogate(c)) {
2182                        result++;
2183                    }
2184                }
2185            }
2186            result++;
2187        }
2188        return result;
2189    }
2190
2191    /**
2192     * Determines the index in the specified character sequence that is offset
2193     * {@code codePointOffset} code points from {@code index}.
2194     *
2195     * @param seq
2196     *            the character sequence to find the index in.
2197     * @param index
2198     *            the start index in {@code seq}.
2199     * @param codePointOffset
2200     *            the number of code points to look backwards or forwards; may
2201     *            be a negative or positive value.
2202     * @return the index in {@code seq} that is {@code codePointOffset} code
2203     *         points away from {@code index}.
2204     * @throws NullPointerException
2205     *             if {@code seq} is {@code null}.
2206     * @throws IndexOutOfBoundsException
2207     *             if {@code index < 0}, {@code index} is greater than the
2208     *             length of {@code seq}, or if there are not enough values in
2209     *             {@code seq} to skip {@code codePointOffset} code points
2210     *             forwards or backwards (if {@code codePointOffset} is
2211     *             negative) from {@code index}.
2212     * @since 1.5
2213     */
2214    public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) {
2215        if (seq == null) {
2216            throw new NullPointerException("seq == null");
2217        }
2218        int len = seq.length();
2219        if (index < 0 || index > len) {
2220            throw new IndexOutOfBoundsException();
2221        }
2222
2223        if (codePointOffset == 0) {
2224            return index;
2225        }
2226
2227        if (codePointOffset > 0) {
2228            int codePoints = codePointOffset;
2229            int i = index;
2230            while (codePoints > 0) {
2231                codePoints--;
2232                if (i >= len) {
2233                    throw new IndexOutOfBoundsException();
2234                }
2235                if (isHighSurrogate(seq.charAt(i))) {
2236                    int next = i + 1;
2237                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2238                        i++;
2239                    }
2240                }
2241                i++;
2242            }
2243            return i;
2244        }
2245
2246        int codePoints = -codePointOffset;
2247        int i = index;
2248        while (codePoints > 0) {
2249            codePoints--;
2250            i--;
2251            if (i < 0) {
2252                throw new IndexOutOfBoundsException();
2253            }
2254            if (isLowSurrogate(seq.charAt(i))) {
2255                int prev = i - 1;
2256                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2257                    i--;
2258                }
2259            }
2260        }
2261        return i;
2262    }
2263
2264    /**
2265     * Determines the index in a subsequence of the specified character array
2266     * that is offset {@code codePointOffset} code points from {@code index}.
2267     * The subsequence is delineated by {@code start} and {@code count}.
2268     *
2269     * @param seq
2270     *            the character array to find the index in.
2271     * @param start
2272     *            the inclusive index that marks the beginning of the
2273     *            subsequence.
2274     * @param count
2275     *            the number of {@code char} values to include within the
2276     *            subsequence.
2277     * @param index
2278     *            the start index in the subsequence of the char array.
2279     * @param codePointOffset
2280     *            the number of code points to look backwards or forwards; may
2281     *            be a negative or positive value.
2282     * @return the index in {@code seq} that is {@code codePointOffset} code
2283     *         points away from {@code index}.
2284     * @throws NullPointerException
2285     *             if {@code seq} is {@code null}.
2286     * @throws IndexOutOfBoundsException
2287     *             if {@code start < 0}, {@code count < 0},
2288     *             {@code index < start}, {@code index > start + count},
2289     *             {@code start + count} is greater than the length of
2290     *             {@code seq}, or if there are not enough values in
2291     *             {@code seq} to skip {@code codePointOffset} code points
2292     *             forward or backward (if {@code codePointOffset} is
2293     *             negative) from {@code index}.
2294     * @since 1.5
2295     */
2296    public static int offsetByCodePoints(char[] seq, int start, int count,
2297            int index, int codePointOffset) {
2298        Arrays.checkOffsetAndCount(seq.length, start, count);
2299        int end = start + count;
2300        if (index < start || index > end) {
2301            throw new IndexOutOfBoundsException();
2302        }
2303
2304        if (codePointOffset == 0) {
2305            return index;
2306        }
2307
2308        if (codePointOffset > 0) {
2309            int codePoints = codePointOffset;
2310            int i = index;
2311            while (codePoints > 0) {
2312                codePoints--;
2313                if (i >= end) {
2314                    throw new IndexOutOfBoundsException();
2315                }
2316                if (isHighSurrogate(seq[i])) {
2317                    int next = i + 1;
2318                    if (next < end && isLowSurrogate(seq[next])) {
2319                        i++;
2320                    }
2321                }
2322                i++;
2323            }
2324            return i;
2325        }
2326
2327        int codePoints = -codePointOffset;
2328        int i = index;
2329        while (codePoints > 0) {
2330            codePoints--;
2331            i--;
2332            if (i < start) {
2333                throw new IndexOutOfBoundsException();
2334            }
2335            if (isLowSurrogate(seq[i])) {
2336                int prev = i - 1;
2337                if (prev >= start && isHighSurrogate(seq[prev])) {
2338                    i--;
2339                }
2340            }
2341        }
2342        return i;
2343    }
2344
2345    /**
2346     * Convenience method to determine the value of the specified character
2347     * {@code c} in the supplied radix. The value of {@code radix} must be
2348     * between MIN_RADIX and MAX_RADIX.
2349     *
2350     * @param c
2351     *            the character to determine the value of.
2352     * @param radix
2353     *            the radix.
2354     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2355     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2356     */
2357    public static int digit(char c, int radix) {
2358        return digit((int) c, radix);
2359    }
2360
2361    /**
2362     * Convenience method to determine the value of the character
2363     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2364     * be between MIN_RADIX and MAX_RADIX.
2365     *
2366     * @param codePoint
2367     *            the character, including supplementary characters.
2368     * @param radix
2369     *            the radix.
2370     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2371     *         {@link #MAX_RADIX} then the value of the character in the radix;
2372     *         -1 otherwise.
2373     */
2374    public static int digit(int codePoint, int radix) {
2375        if (radix < MIN_RADIX || radix > MAX_RADIX) {
2376            return -1;
2377        }
2378        if (codePoint < 128) {
2379            // Optimized for ASCII
2380            int result = -1;
2381            if ('0' <= codePoint && codePoint <= '9') {
2382                result = codePoint - '0';
2383            } else if ('a' <= codePoint && codePoint <= 'z') {
2384                result = 10 + (codePoint - 'a');
2385            } else if ('A' <= codePoint && codePoint <= 'Z') {
2386                result = 10 + (codePoint - 'A');
2387            }
2388            return result < radix ? result : -1;
2389        }
2390        return digitImpl(codePoint, radix);
2391    }
2392
2393    private static native int digitImpl(int codePoint, int radix);
2394
2395    /**
2396     * Compares this object with the specified object and indicates if they are
2397     * equal. In order to be equal, {@code object} must be an instance of
2398     * {@code Character} and have the same char value as this object.
2399     *
2400     * @param object
2401     *            the object to compare this double with.
2402     * @return {@code true} if the specified object is equal to this
2403     *         {@code Character}; {@code false} otherwise.
2404     */
2405    @Override
2406    public boolean equals(Object object) {
2407        return (object instanceof Character) && (((Character) object).value == value);
2408    }
2409
2410    /**
2411     * Returns the character which represents the specified digit in the
2412     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2413     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2414     * smaller than {@code radix}. If any of these conditions does not hold, 0
2415     * is returned.
2416     *
2417     * @param digit
2418     *            the integer value.
2419     * @param radix
2420     *            the radix.
2421     * @return the character which represents the {@code digit} in the
2422     *         {@code radix}.
2423     */
2424    public static char forDigit(int digit, int radix) {
2425        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2426            if (digit >= 0 && digit < radix) {
2427                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2428            }
2429        }
2430        return 0;
2431    }
2432
2433    /**
2434     * Returns a human-readable name for the given code point,
2435     * or null if the code point is unassigned.
2436     *
2437     * <p>As a fallback mechanism this method returns strings consisting of the Unicode
2438     * block name (with underscores replaced by spaces), a single space, and the uppercase
2439     * hex value of the code point, using as few digits as necessary.
2440     *
2441     * <p>Examples:
2442     * <ul>
2443     * <li>{@code Character.getName(0)} returns "NULL".
2444     * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E".
2445     * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX".
2446     * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000".
2447     * </ul>
2448     *
2449     * <p>Note that the exact strings returned will vary from release to release.
2450     *
2451     * @throws IllegalArgumentException if {@code codePoint} is not a valid code point.
2452     * @since 1.7
2453     */
2454    public static String getName(int codePoint) {
2455        checkValidCodePoint(codePoint);
2456        if (getType(codePoint) == Character.UNASSIGNED) {
2457            return null;
2458        }
2459        String result = getNameImpl(codePoint);
2460        if (result == null) {
2461            String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ');
2462            result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0);
2463        }
2464        return result;
2465    }
2466
2467    private static native String getNameImpl(int codePoint);
2468
2469    /**
2470     * Returns the numeric value of the specified Unicode character.
2471     * See {@link #getNumericValue(int)}.
2472     *
2473     * @param c the character
2474     * @return a non-negative numeric integer value if a numeric value for
2475     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2476     *         -2 if the numeric value can not be represented as an integer.
2477     */
2478    public static int getNumericValue(char c) {
2479        return getNumericValue((int) c);
2480    }
2481
2482    /**
2483     * Gets the numeric value of the specified Unicode code point. For example,
2484     * the code point '\u216B' stands for the Roman number XII, which has the
2485     * numeric value 12.
2486     *
2487     * <p>There are two points of divergence between this method and the Unicode
2488     * specification. This method treats the letters a-z (in both upper and lower
2489     * cases, and their full-width variants) as numbers from 10 to 35. The
2490     * Unicode specification also supports the idea of code points with non-integer
2491     * numeric values; this method does not (except to the extent of returning -2
2492     * for such code points).
2493     *
2494     * @param codePoint the code point
2495     * @return a non-negative numeric integer value if a numeric value for
2496     *         {@code codePoint} exists, -1 if there is no numeric value for
2497     *         {@code codePoint}, -2 if the numeric value can not be
2498     *         represented with an integer.
2499     */
2500    public static int getNumericValue(int codePoint) {
2501        // This is both an optimization and papers over differences between Java and ICU.
2502        if (codePoint < 128) {
2503            if (codePoint >= '0' && codePoint <= '9') {
2504                return codePoint - '0';
2505            }
2506            if (codePoint >= 'a' && codePoint <= 'z') {
2507                return codePoint - ('a' - 10);
2508            }
2509            if (codePoint >= 'A' && codePoint <= 'Z') {
2510                return codePoint - ('A' - 10);
2511            }
2512            return -1;
2513        }
2514        // Full-width uppercase A-Z.
2515        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
2516            return codePoint - 0xff17;
2517        }
2518        // Full-width lowercase a-z.
2519        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
2520            return codePoint - 0xff37;
2521        }
2522        return getNumericValueImpl(codePoint);
2523    }
2524
2525    private static native int getNumericValueImpl(int codePoint);
2526
2527    /**
2528     * Gets the general Unicode category of the specified character.
2529     *
2530     * @param c
2531     *            the character to get the category of.
2532     * @return the Unicode category of {@code c}.
2533     */
2534    public static int getType(char c) {
2535        return getType((int) c);
2536    }
2537
2538    /**
2539     * Gets the general Unicode category of the specified code point.
2540     *
2541     * @param codePoint
2542     *            the Unicode code point to get the category of.
2543     * @return the Unicode category of {@code codePoint}.
2544     */
2545    public static int getType(int codePoint) {
2546        int type = getTypeImpl(codePoint);
2547        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
2548        if (type <= Character.FORMAT) {
2549            return type;
2550        }
2551        return (type + 1);
2552    }
2553
2554    private static native int getTypeImpl(int codePoint);
2555
2556    /**
2557     * Gets the Unicode directionality of the specified character.
2558     *
2559     * @param c
2560     *            the character to get the directionality of.
2561     * @return the Unicode directionality of {@code c}.
2562     */
2563    public static byte getDirectionality(char c) {
2564        return getDirectionality((int)c);
2565    }
2566
2567    /**
2568     * Gets the Unicode directionality of the specified character.
2569     *
2570     * @param codePoint
2571     *            the Unicode code point to get the directionality of.
2572     * @return the Unicode directionality of {@code codePoint}.
2573     */
2574    public static byte getDirectionality(int codePoint) {
2575        if (getType(codePoint) == Character.UNASSIGNED) {
2576            return Character.DIRECTIONALITY_UNDEFINED;
2577        }
2578
2579        byte directionality = getDirectionalityImpl(codePoint);
2580        if (directionality == -1) {
2581            return -1;
2582        }
2583        return DIRECTIONALITY[directionality];
2584    }
2585
2586    private static native byte getDirectionalityImpl(int codePoint);
2587
2588    /**
2589     * Indicates whether the specified character is mirrored.
2590     *
2591     * @param c
2592     *            the character to check.
2593     * @return {@code true} if {@code c} is mirrored; {@code false}
2594     *         otherwise.
2595     */
2596    public static boolean isMirrored(char c) {
2597        return isMirrored((int) c);
2598    }
2599
2600    /**
2601     * Indicates whether the specified code point is mirrored.
2602     *
2603     * @param codePoint
2604     *            the code point to check.
2605     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2606     *         otherwise.
2607     */
2608    public static boolean isMirrored(int codePoint) {
2609        return isMirroredImpl(codePoint);
2610    }
2611
2612    private static native boolean isMirroredImpl(int codePoint);
2613
2614    @Override
2615    public int hashCode() {
2616        return value;
2617    }
2618
2619    /**
2620     * Returns the high surrogate for the given code point. The result is meaningless if
2621     * the given code point is not a supplementary character.
2622     * @since 1.7
2623     */
2624    public static char highSurrogate(int codePoint) {
2625        return (char) ((codePoint >> 10) + 0xd7c0);
2626    }
2627
2628    /**
2629     * Returns the low surrogate for the given code point. The result is meaningless if
2630     * the given code point is not a supplementary character.
2631     * @since 1.7
2632     */
2633    public static char lowSurrogate(int codePoint) {
2634        return (char) ((codePoint & 0x3ff) | 0xdc00);
2635    }
2636
2637    /**
2638     * Returns true if the given code point is in the Basic Multilingual Plane (BMP).
2639     * Such code points can be represented by a single {@code char}.
2640     * @since 1.7
2641     */
2642    public static boolean isBmpCodePoint(int codePoint) {
2643       return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE;
2644    }
2645
2646    /**
2647     * Indicates whether the specified character is defined in the Unicode
2648     * specification.
2649     *
2650     * @param c
2651     *            the character to check.
2652     * @return {@code true} if the general Unicode category of the character is
2653     *         not {@code UNASSIGNED}; {@code false} otherwise.
2654     */
2655    public static boolean isDefined(char c) {
2656        return isDefinedImpl(c);
2657    }
2658
2659    /**
2660     * Indicates whether the specified code point is defined in the Unicode
2661     * specification.
2662     *
2663     * @param codePoint
2664     *            the code point to check.
2665     * @return {@code true} if the general Unicode category of the code point is
2666     *         not {@code UNASSIGNED}; {@code false} otherwise.
2667     */
2668    public static boolean isDefined(int codePoint) {
2669        return isDefinedImpl(codePoint);
2670    }
2671
2672    private static native boolean isDefinedImpl(int codePoint);
2673
2674    /**
2675     * Indicates whether the specified character is a digit.
2676     *
2677     * @param c
2678     *            the character to check.
2679     * @return {@code true} if {@code c} is a digit; {@code false}
2680     *         otherwise.
2681     */
2682    public static boolean isDigit(char c) {
2683        return isDigit((int) c);
2684    }
2685
2686    /**
2687     * Indicates whether the specified code point is a digit.
2688     *
2689     * @param codePoint
2690     *            the code point to check.
2691     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2692     *         otherwise.
2693     */
2694    public static boolean isDigit(int codePoint) {
2695        // Optimized case for ASCII
2696        if ('0' <= codePoint && codePoint <= '9') {
2697            return true;
2698        }
2699        if (codePoint < 1632) {
2700            return false;
2701        }
2702        return isDigitImpl(codePoint);
2703    }
2704
2705    private static native boolean isDigitImpl(int codePoint);
2706
2707    /**
2708     * Indicates whether the specified character is ignorable in a Java or
2709     * Unicode identifier.
2710     *
2711     * @param c
2712     *            the character to check.
2713     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2714     */
2715    public static boolean isIdentifierIgnorable(char c) {
2716        return isIdentifierIgnorable((int) c);
2717    }
2718
2719    /**
2720     * Indicates whether the specified code point is ignorable in a Java or
2721     * Unicode identifier.
2722     *
2723     * @param codePoint
2724     *            the code point to check.
2725     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2726     *         otherwise.
2727     */
2728    public static boolean isIdentifierIgnorable(int codePoint) {
2729        // This is both an optimization and papers over differences between Java and ICU.
2730        if (codePoint < 0x600) {
2731            return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
2732                    (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
2733        }
2734        return isIdentifierIgnorableImpl(codePoint);
2735    }
2736
2737    private static native boolean isIdentifierIgnorableImpl(int codePoint);
2738
2739    /**
2740     * Indicates whether the specified character is an ISO control character.
2741     *
2742     * @param c
2743     *            the character to check.
2744     * @return {@code true} if {@code c} is an ISO control character;
2745     *         {@code false} otherwise.
2746     */
2747    public static boolean isISOControl(char c) {
2748        return isISOControl((int) c);
2749    }
2750
2751    /**
2752     * Indicates whether the specified code point is an ISO control character.
2753     *
2754     * @param c
2755     *            the code point to check.
2756     * @return {@code true} if {@code c} is an ISO control character;
2757     *         {@code false} otherwise.
2758     */
2759    public static boolean isISOControl(int c) {
2760        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2761    }
2762
2763    /**
2764     * Indicates whether the specified character is a valid part of a Java
2765     * identifier other than the first character.
2766     *
2767     * @param c
2768     *            the character to check.
2769     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2770     *         {@code false} otherwise.
2771     */
2772    public static boolean isJavaIdentifierPart(char c) {
2773        return isJavaIdentifierPart((int) c);
2774    }
2775
2776    /**
2777     * Indicates whether the specified code point is a valid part of a Java
2778     * identifier other than the first character.
2779     *
2780     * @param codePoint
2781     *            the code point to check.
2782     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2783     *         {@code false} otherwise.
2784     */
2785    public static boolean isJavaIdentifierPart(int codePoint) {
2786        // Use precomputed bitmasks to optimize the ASCII range.
2787        if (codePoint < 64) {
2788            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
2789        } else if (codePoint < 128) {
2790            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2791        }
2792        int type = getType(codePoint);
2793        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2794                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2795                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2796                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2797                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
2798                || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
2799    }
2800
2801    /**
2802     * Indicates whether the specified character is a valid first character for
2803     * a Java identifier.
2804     *
2805     * @param c
2806     *            the character to check.
2807     * @return {@code true} if {@code c} is a valid first character of a Java
2808     *         identifier; {@code false} otherwise.
2809     */
2810    public static boolean isJavaIdentifierStart(char c) {
2811        return isJavaIdentifierStart((int) c);
2812    }
2813
2814    /**
2815     * Indicates whether the specified code point is a valid first character for
2816     * a Java identifier.
2817     *
2818     * @param codePoint
2819     *            the code point to check.
2820     * @return {@code true} if {@code codePoint} is a valid start of a Java
2821     *         identifier; {@code false} otherwise.
2822     */
2823    public static boolean isJavaIdentifierStart(int codePoint) {
2824        // Use precomputed bitmasks to optimize the ASCII range.
2825        if (codePoint < 64) {
2826            return (codePoint == '$'); // There's only one character in this range.
2827        } else if (codePoint < 128) {
2828            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2829        }
2830        int type = getType(codePoint);
2831        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
2832                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2833    }
2834
2835    /**
2836     * Indicates whether the specified character is a Java letter.
2837     *
2838     * @param c
2839     *            the character to check.
2840     * @return {@code true} if {@code c} is a Java letter; {@code false}
2841     *         otherwise.
2842     * @deprecated Use {@link #isJavaIdentifierStart(char)}
2843     */
2844    @Deprecated
2845    public static boolean isJavaLetter(char c) {
2846        return isJavaIdentifierStart(c);
2847    }
2848
2849    /**
2850     * Indicates whether the specified character is a Java letter or digit
2851     * character.
2852     *
2853     * @param c
2854     *            the character to check.
2855     * @return {@code true} if {@code c} is a Java letter or digit;
2856     *         {@code false} otherwise.
2857     * @deprecated Use {@link #isJavaIdentifierPart(char)}
2858     */
2859    @Deprecated
2860    public static boolean isJavaLetterOrDigit(char c) {
2861        return isJavaIdentifierPart(c);
2862    }
2863
2864    /**
2865     * Indicates whether the specified character is a letter.
2866     *
2867     * @param c
2868     *            the character to check.
2869     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2870     */
2871    public static boolean isLetter(char c) {
2872        return isLetter((int) c);
2873    }
2874
2875    /**
2876     * Indicates whether the specified code point is a letter.
2877     *
2878     * @param codePoint
2879     *            the code point to check.
2880     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2881     *         otherwise.
2882     */
2883    public static boolean isLetter(int codePoint) {
2884        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2885            return true;
2886        }
2887        if (codePoint < 128) {
2888            return false;
2889        }
2890        return isLetterImpl(codePoint);
2891    }
2892
2893    private static native boolean isLetterImpl(int codePoint);
2894
2895    /**
2896     * Indicates whether the specified character is a letter or a digit.
2897     *
2898     * @param c
2899     *            the character to check.
2900     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2901     *         otherwise.
2902     */
2903    public static boolean isLetterOrDigit(char c) {
2904        return isLetterOrDigit((int) c);
2905    }
2906
2907    /**
2908     * Indicates whether the specified code point is a letter or a digit.
2909     *
2910     * @param codePoint
2911     *            the code point to check.
2912     * @return {@code true} if {@code codePoint} is a letter or a digit;
2913     *         {@code false} otherwise.
2914     */
2915    public static boolean isLetterOrDigit(int codePoint) {
2916        // Optimized case for ASCII
2917        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2918            return true;
2919        }
2920        if ('0' <= codePoint && codePoint <= '9') {
2921            return true;
2922        }
2923        if (codePoint < 128) {
2924            return false;
2925        }
2926        return isLetterOrDigitImpl(codePoint);
2927    }
2928
2929    private static native boolean isLetterOrDigitImpl(int codePoint);
2930
2931    /**
2932     * Indicates whether the specified character is a lower case letter.
2933     *
2934     * @param c
2935     *            the character to check.
2936     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2937     *         otherwise.
2938     */
2939    public static boolean isLowerCase(char c) {
2940        return isLowerCase((int) c);
2941    }
2942
2943    /**
2944     * Indicates whether the specified code point is a lower case letter.
2945     *
2946     * @param codePoint
2947     *            the code point to check.
2948     * @return {@code true} if {@code codePoint} is a lower case letter;
2949     *         {@code false} otherwise.
2950     */
2951    public static boolean isLowerCase(int codePoint) {
2952        // Optimized case for ASCII
2953        if ('a' <= codePoint && codePoint <= 'z') {
2954            return true;
2955        }
2956        if (codePoint < 128) {
2957            return false;
2958        }
2959        return isLowerCaseImpl(codePoint);
2960    }
2961
2962    private static native boolean isLowerCaseImpl(int codePoint);
2963
2964    /**
2965     * Indicates whether the specified character is a Java space.
2966     *
2967     * @param c
2968     *            the character to check.
2969     * @return {@code true} if {@code c} is a Java space; {@code false}
2970     *         otherwise.
2971     * @deprecated Use {@link #isWhitespace(char)}
2972     */
2973    @Deprecated
2974    public static boolean isSpace(char c) {
2975        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2976    }
2977
2978    /**
2979     * Indicates whether the specified character is a Unicode space character.
2980     * That is, if it is a member of one of the Unicode categories Space
2981     * Separator, Line Separator, or Paragraph Separator.
2982     *
2983     * @param c
2984     *            the character to check.
2985     * @return {@code true} if {@code c} is a Unicode space character,
2986     *         {@code false} otherwise.
2987     */
2988    public static boolean isSpaceChar(char c) {
2989        return isSpaceChar((int) c);
2990    }
2991
2992    /**
2993     * Indicates whether the specified code point is a Unicode space character.
2994     * That is, if it is a member of one of the Unicode categories Space
2995     * Separator, Line Separator, or Paragraph Separator.
2996     *
2997     * @param codePoint
2998     *            the code point to check.
2999     * @return {@code true} if {@code codePoint} is a Unicode space character,
3000     *         {@code false} otherwise.
3001     */
3002    public static boolean isSpaceChar(int codePoint) {
3003        if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
3004            return true;
3005        }
3006        if (codePoint < 0x2000) {
3007            return false;
3008        }
3009        if (codePoint <= 0xffff) {
3010            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3011                    codePoint == 0x202f || codePoint == 0x3000;
3012        }
3013        return isSpaceCharImpl(codePoint);
3014    }
3015
3016    private static native boolean isSpaceCharImpl(int codePoint);
3017
3018    /**
3019     * Indicates whether the specified character is a titlecase character.
3020     *
3021     * @param c
3022     *            the character to check.
3023     * @return {@code true} if {@code c} is a titlecase character, {@code false}
3024     *         otherwise.
3025     */
3026    public static boolean isTitleCase(char c) {
3027        return isTitleCaseImpl(c);
3028    }
3029
3030    /**
3031     * Indicates whether the specified code point is a titlecase character.
3032     *
3033     * @param codePoint
3034     *            the code point to check.
3035     * @return {@code true} if {@code codePoint} is a titlecase character,
3036     *         {@code false} otherwise.
3037     */
3038    public static boolean isTitleCase(int codePoint) {
3039        return isTitleCaseImpl(codePoint);
3040    }
3041
3042    private static native boolean isTitleCaseImpl(int codePoint);
3043
3044    /**
3045     * Indicates whether the specified character is valid as part of a Unicode
3046     * identifier other than the first character.
3047     *
3048     * @param c
3049     *            the character to check.
3050     * @return {@code true} if {@code c} is valid as part of a Unicode
3051     *         identifier; {@code false} otherwise.
3052     */
3053    public static boolean isUnicodeIdentifierPart(char c) {
3054        return isUnicodeIdentifierPartImpl(c);
3055    }
3056
3057    /**
3058     * Indicates whether the specified code point is valid as part of a Unicode
3059     * identifier other than the first character.
3060     *
3061     * @param codePoint
3062     *            the code point to check.
3063     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
3064     *         identifier; {@code false} otherwise.
3065     */
3066    public static boolean isUnicodeIdentifierPart(int codePoint) {
3067        return isUnicodeIdentifierPartImpl(codePoint);
3068    }
3069
3070    private static native boolean isUnicodeIdentifierPartImpl(int codePoint);
3071
3072    /**
3073     * Indicates whether the specified character is a valid initial character
3074     * for a Unicode identifier.
3075     *
3076     * @param c
3077     *            the character to check.
3078     * @return {@code true} if {@code c} is a valid first character for a
3079     *         Unicode identifier; {@code false} otherwise.
3080     */
3081    public static boolean isUnicodeIdentifierStart(char c) {
3082        return isUnicodeIdentifierStartImpl(c);
3083    }
3084
3085    /**
3086     * Indicates whether the specified code point is a valid initial character
3087     * for a Unicode identifier.
3088     *
3089     * @param codePoint
3090     *            the code point to check.
3091     * @return {@code true} if {@code codePoint} is a valid first character for
3092     *         a Unicode identifier; {@code false} otherwise.
3093     */
3094    public static boolean isUnicodeIdentifierStart(int codePoint) {
3095        return isUnicodeIdentifierStartImpl(codePoint);
3096    }
3097
3098    private static native boolean isUnicodeIdentifierStartImpl(int codePoint);
3099
3100    /**
3101     * Indicates whether the specified character is an upper case letter.
3102     *
3103     * @param c
3104     *            the character to check.
3105     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3106     *         otherwise.
3107     */
3108    public static boolean isUpperCase(char c) {
3109        return isUpperCase((int) c);
3110    }
3111
3112    /**
3113     * Indicates whether the specified code point is an upper case letter.
3114     *
3115     * @param codePoint
3116     *            the code point to check.
3117     * @return {@code true} if {@code codePoint} is a upper case letter;
3118     *         {@code false} otherwise.
3119     */
3120    public static boolean isUpperCase(int codePoint) {
3121        // Optimized case for ASCII
3122        if ('A' <= codePoint && codePoint <= 'Z') {
3123            return true;
3124        }
3125        if (codePoint < 128) {
3126            return false;
3127        }
3128        return isUpperCaseImpl(codePoint);
3129    }
3130
3131    private static native boolean isUpperCaseImpl(int codePoint);
3132
3133    /**
3134     * Indicates whether the specified character is a whitespace character in
3135     * Java.
3136     *
3137     * @param c
3138     *            the character to check.
3139     * @return {@code true} if the supplied {@code c} is a whitespace character
3140     *         in Java; {@code false} otherwise.
3141     */
3142    public static boolean isWhitespace(char c) {
3143        return isWhitespace((int) c);
3144    }
3145
3146    /**
3147     * Indicates whether the specified code point is a whitespace character in
3148     * Java.
3149     *
3150     * @param codePoint
3151     *            the code point to check.
3152     * @return {@code true} if the supplied {@code c} is a whitespace character
3153     *         in Java; {@code false} otherwise.
3154     */
3155    public static boolean isWhitespace(int codePoint) {
3156        // This is both an optimization and papers over differences between Java and ICU.
3157        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
3158            return true;
3159        }
3160        if (codePoint == 0x1680) {
3161            return true;
3162        }
3163        if (codePoint < 0x2000 || codePoint == 0x2007) {
3164            return false;
3165        }
3166        if (codePoint <= 0xffff) {
3167            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3168                    codePoint == 0x3000;
3169        }
3170        return isWhitespaceImpl(codePoint);
3171    }
3172
3173    private static native boolean isWhitespaceImpl(int codePoint);
3174
3175    /**
3176     * Reverses the order of the first and second byte in the specified
3177     * character.
3178     *
3179     * @param c
3180     *            the character to reverse.
3181     * @return the character with reordered bytes.
3182     */
3183    public static char reverseBytes(char c) {
3184        return (char)((c<<8) | (c>>8));
3185    }
3186
3187    /**
3188     * Returns the lower case equivalent for the specified character if the
3189     * character is an upper case letter. Otherwise, the specified character is
3190     * returned unchanged.
3191     *
3192     * @param c
3193     *            the character
3194     * @return if {@code c} is an upper case character then its lower case
3195     *         counterpart, otherwise just {@code c}.
3196     */
3197    public static char toLowerCase(char c) {
3198        return (char) toLowerCase((int) c);
3199    }
3200
3201    /**
3202     * Returns the lower case equivalent for the specified code point if it is
3203     * an upper case letter. Otherwise, the specified code point is returned
3204     * unchanged.
3205     *
3206     * @param codePoint
3207     *            the code point to check.
3208     * @return if {@code codePoint} is an upper case character then its lower
3209     *         case counterpart, otherwise just {@code codePoint}.
3210     */
3211    public static int toLowerCase(int codePoint) {
3212        // Optimized case for ASCII
3213        if ('A' <= codePoint && codePoint <= 'Z') {
3214            return (char) (codePoint + ('a' - 'A'));
3215        }
3216        if (codePoint < 192) {
3217            return codePoint;
3218        }
3219        return toLowerCaseImpl(codePoint);
3220    }
3221
3222    private static native int toLowerCaseImpl(int codePoint);
3223
3224    @Override
3225    public String toString() {
3226        return String.valueOf(value);
3227    }
3228
3229    /**
3230     * Converts the specified character to its string representation.
3231     *
3232     * @param value
3233     *            the character to convert.
3234     * @return the character converted to a string.
3235     */
3236    public static String toString(char value) {
3237        return String.valueOf(value);
3238    }
3239
3240    /**
3241     * Returns the title case equivalent for the specified character if it
3242     * exists. Otherwise, the specified character is returned unchanged.
3243     *
3244     * @param c
3245     *            the character to convert.
3246     * @return the title case equivalent of {@code c} if it exists, otherwise
3247     *         {@code c}.
3248     */
3249    public static char toTitleCase(char c) {
3250        return (char) toTitleCaseImpl(c);
3251    }
3252
3253    /**
3254     * Returns the title case equivalent for the specified code point if it
3255     * exists. Otherwise, the specified code point is returned unchanged.
3256     *
3257     * @param codePoint
3258     *            the code point to convert.
3259     * @return the title case equivalent of {@code codePoint} if it exists,
3260     *         otherwise {@code codePoint}.
3261     */
3262    public static int toTitleCase(int codePoint) {
3263        return toTitleCaseImpl(codePoint);
3264    }
3265
3266    private static native int toTitleCaseImpl(int codePoint);
3267
3268    /**
3269     * Returns the upper case equivalent for the specified character if the
3270     * character is a lower case letter. Otherwise, the specified character is
3271     * returned unchanged.
3272     *
3273     * @param c
3274     *            the character to convert.
3275     * @return if {@code c} is a lower case character then its upper case
3276     *         counterpart, otherwise just {@code c}.
3277     */
3278    public static char toUpperCase(char c) {
3279        return (char) toUpperCase((int) c);
3280    }
3281
3282    /**
3283     * Returns the upper case equivalent for the specified code point if the
3284     * code point is a lower case letter. Otherwise, the specified code point is
3285     * returned unchanged.
3286     *
3287     * @param codePoint
3288     *            the code point to convert.
3289     * @return if {@code codePoint} is a lower case character then its upper
3290     *         case counterpart, otherwise just {@code codePoint}.
3291     */
3292    public static int toUpperCase(int codePoint) {
3293        // Optimized case for ASCII
3294        if ('a' <= codePoint && codePoint <= 'z') {
3295            return (char) (codePoint - ('a' - 'A'));
3296        }
3297        if (codePoint < 181) {
3298            return codePoint;
3299        }
3300        return toUpperCaseImpl(codePoint);
3301    }
3302
3303    private static native int toUpperCaseImpl(int codePoint);
3304}
3305