Character.java revision c574d81e3de6cb92cf68bd44f7e50ac52fe2fd87
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21// BEGIN android-removed
22// import java.util.SortedMap;
23// import java.util.TreeMap;
24//
25// import org.apache.harmony.luni.util.BinarySearch;
26// END android-removed
27
28// BEGIN android-changed
29import com.ibm.icu4jni.lang.UCharacter;
30// END android-changed
31
32/**
33 * The wrapper for the primitive type {@code char}. This class also provides a
34 * number of utility methods for working with characters.
35 * <p>
36 * Character data is based upon the Unicode Standard, 4.0. The Unicode
37 * specification, character tables and other information are available at <a
38 * href="http://www.unicode.org/">http://www.unicode.org/</a>.
39 * <p>
40 * Unicode characters are referred to as <i>code points</i>. The range of valid
41 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
42 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
43 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
44 * encoding and {@code char} pairs are used to represent code points in the
45 * supplementary range. A pair of {@code char} values that represent a
46 * supplementary character are made up of a <i>high surrogate</i> with a value
47 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
48 * 0xDC00 to 0xDFFF.
49 * <p>
50 * On the Java platform a {@code char} value represents either a single BMP code
51 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
52 * is used to represent all Unicode code points.
53 *
54 * @since 1.0
55 */
56public final class Character implements Serializable, Comparable<Character> {
57    private static final long serialVersionUID = 3786198910865385080L;
58
59    private final char value;
60
61    /**
62     * The minimum {@code Character} value.
63     */
64    public static final char MIN_VALUE = '\u0000';
65
66    /**
67     * The maximum {@code Character} value.
68     */
69    public static final char MAX_VALUE = '\uffff';
70
71    /**
72     * The minimum radix used for conversions between characters and integers.
73     */
74    public static final int MIN_RADIX = 2;
75
76    /**
77     * The maximum radix used for conversions between characters and integers.
78     */
79    public static final int MAX_RADIX = 36;
80
81    /**
82     * The {@link Class} object that represents the primitive type {@code char}.
83     */
84    @SuppressWarnings("unchecked")
85    public static final Class<Character> TYPE
86            = (Class<Character>) char[].class.getComponentType();
87
88    // Note: This can't be set to "char.class", since *that* is
89    // defined to be "java.lang.Character.TYPE";
90
91    /**
92     * Unicode category constant Cn.
93     */
94    public static final byte UNASSIGNED = 0;
95
96    /**
97     * Unicode category constant Lu.
98     */
99    public static final byte UPPERCASE_LETTER = 1;
100
101    /**
102     * Unicode category constant Ll.
103     */
104    public static final byte LOWERCASE_LETTER = 2;
105
106    /**
107     * Unicode category constant Lt.
108     */
109    public static final byte TITLECASE_LETTER = 3;
110
111    /**
112     * Unicode category constant Lm.
113     */
114    public static final byte MODIFIER_LETTER = 4;
115
116    /**
117     * Unicode category constant Lo.
118     */
119    public static final byte OTHER_LETTER = 5;
120
121    /**
122     * Unicode category constant Mn.
123     */
124    public static final byte NON_SPACING_MARK = 6;
125
126    /**
127     * Unicode category constant Me.
128     */
129    public static final byte ENCLOSING_MARK = 7;
130
131    /**
132     * Unicode category constant Mc.
133     */
134    public static final byte COMBINING_SPACING_MARK = 8;
135
136    /**
137     * Unicode category constant Nd.
138     */
139    public static final byte DECIMAL_DIGIT_NUMBER = 9;
140
141    /**
142     * Unicode category constant Nl.
143     */
144    public static final byte LETTER_NUMBER = 10;
145
146    /**
147     * Unicode category constant No.
148     */
149    public static final byte OTHER_NUMBER = 11;
150
151    /**
152     * Unicode category constant Zs.
153     */
154    public static final byte SPACE_SEPARATOR = 12;
155
156    /**
157     * Unicode category constant Zl.
158     */
159    public static final byte LINE_SEPARATOR = 13;
160
161    /**
162     * Unicode category constant Zp.
163     */
164    public static final byte PARAGRAPH_SEPARATOR = 14;
165
166    /**
167     * Unicode category constant Cc.
168     */
169    public static final byte CONTROL = 15;
170
171    /**
172     * Unicode category constant Cf.
173     */
174    public static final byte FORMAT = 16;
175
176    /**
177     * Unicode category constant Co.
178     */
179    public static final byte PRIVATE_USE = 18;
180
181    /**
182     * Unicode category constant Cs.
183     */
184    public static final byte SURROGATE = 19;
185
186    /**
187     * Unicode category constant Pd.
188     */
189    public static final byte DASH_PUNCTUATION = 20;
190
191    /**
192     * Unicode category constant Ps.
193     */
194    public static final byte START_PUNCTUATION = 21;
195
196    /**
197     * Unicode category constant Pe.
198     */
199    public static final byte END_PUNCTUATION = 22;
200
201    /**
202     * Unicode category constant Pc.
203     */
204    public static final byte CONNECTOR_PUNCTUATION = 23;
205
206    /**
207     * Unicode category constant Po.
208     */
209    public static final byte OTHER_PUNCTUATION = 24;
210
211    /**
212     * Unicode category constant Sm.
213     */
214    public static final byte MATH_SYMBOL = 25;
215
216    /**
217     * Unicode category constant Sc.
218     */
219    public static final byte CURRENCY_SYMBOL = 26;
220
221    /**
222     * Unicode category constant Sk.
223     */
224    public static final byte MODIFIER_SYMBOL = 27;
225
226    /**
227     * Unicode category constant So.
228     */
229    public static final byte OTHER_SYMBOL = 28;
230
231    /**
232     * Unicode category constant Pi.
233     *
234     * @since 1.4
235     */
236    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
237
238    /**
239     * Unicode category constant Pf.
240     *
241     * @since 1.4
242     */
243    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
244
245    /**
246     * Unicode bidirectional constant.
247     *
248     * @since 1.4
249     */
250    public static final byte DIRECTIONALITY_UNDEFINED = -1;
251
252    /**
253     * Unicode bidirectional constant L.
254     *
255     * @since 1.4
256     */
257    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
258
259    /**
260     * Unicode bidirectional constant R.
261     *
262     * @since 1.4
263     */
264    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
265
266    /**
267     * Unicode bidirectional constant AL.
268     *
269     * @since 1.4
270     */
271    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
272
273    /**
274     * Unicode bidirectional constant EN.
275     *
276     * @since 1.4
277     */
278    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
279
280    /**
281     * Unicode bidirectional constant ES.
282     *
283     * @since 1.4
284     */
285    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
286
287    /**
288     * Unicode bidirectional constant ET.
289     *
290     * @since 1.4
291     */
292    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
293
294    /**
295     * Unicode bidirectional constant AN.
296     *
297     * @since 1.4
298     */
299    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
300
301    /**
302     * Unicode bidirectional constant CS.
303     *
304     * @since 1.4
305     */
306    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
307
308    /**
309     * Unicode bidirectional constant NSM.
310     *
311     * @since 1.4
312     */
313    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
314
315    /**
316     * Unicode bidirectional constant BN.
317     *
318     * @since 1.4
319     */
320    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
321
322    /**
323     * Unicode bidirectional constant B.
324     *
325     * @since 1.4
326     */
327    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
328
329    /**
330     * Unicode bidirectional constant S.
331     *
332     * @since 1.4
333     */
334    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
335
336    /**
337     * Unicode bidirectional constant WS.
338     *
339     * @since 1.4
340     */
341    public static final byte DIRECTIONALITY_WHITESPACE = 12;
342
343    /**
344     * Unicode bidirectional constant ON.
345     *
346     * @since 1.4
347     */
348    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
349
350    /**
351     * Unicode bidirectional constant LRE.
352     *
353     * @since 1.4
354     */
355    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
356
357    /**
358     * Unicode bidirectional constant LRO.
359     *
360     * @since 1.4
361     */
362    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
363
364    /**
365     * Unicode bidirectional constant RLE.
366     *
367     * @since 1.4
368     */
369    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
370
371    /**
372     * Unicode bidirectional constant RLO.
373     *
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
377
378    /**
379     * Unicode bidirectional constant PDF.
380     *
381     * @since 1.4
382     */
383    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
384
385    /**
386     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
387     * encoding, {@code '\uD800'}.
388     *
389     * @since 1.5
390     */
391    public static final char MIN_HIGH_SURROGATE = '\uD800';
392
393    /**
394     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
395     * encoding, {@code '\uDBFF'}.
396     *
397     * @since 1.5
398     */
399    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
400
401    /**
402     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
403     * encoding, {@code '\uDC00'}.
404     *
405     * @since 1.5
406     */
407    public static final char MIN_LOW_SURROGATE = '\uDC00';
408
409    /**
410     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
411     * encoding, {@code '\uDFFF'}.
412     *
413     * @since 1.5
414     */
415    public static final char MAX_LOW_SURROGATE = '\uDFFF';
416
417    /**
418     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
419     *
420     * @since 1.5
421     */
422    public static final char MIN_SURROGATE = '\uD800';
423
424    /**
425     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
426     *
427     * @since 1.5
428     */
429    public static final char MAX_SURROGATE = '\uDFFF';
430
431    /**
432     * The minimum value of a supplementary code point, {@code U+010000}.
433     *
434     * @since 1.5
435     */
436    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
437
438    /**
439     * The minimum code point value, {@code U+0000}.
440     *
441     * @since 1.5
442     */
443    public static final int MIN_CODE_POINT = 0x000000;
444
445    /**
446     * The maximum code point value, {@code U+10FFFF}.
447     *
448     * @since 1.5
449     */
450    public static final int MAX_CODE_POINT = 0x10FFFF;
451
452    /**
453     * The number of bits required to represent a {@code Character} value
454     * unsigned form.
455     *
456     * @since 1.5
457     */
458    public static final int SIZE = 16;
459
460    // BEGIN android-removed
461    // Unicode 3.0.1 (same as Unicode 3.0.0)
462    // private static final String bidiKeys = ...
463
464    // private static final char[] bidiValues = ...
465
466    // private static final char[] mirrored = ...
467
468    // Unicode 3.0.1 (same as Unicode 3.0.0)
469    // private static final String typeKeys = ...
470
471    // private static final char[] typeValues = ...
472
473    // private static final int[] typeValuesCache = ...
474
475    // Unicode 3.0.1 (same as Unicode 3.0.0)
476    // private static final String uppercaseKeys = ...
477
478    // private static final char[] uppercaseValues = ...
479
480    // private static final int[] uppercaseValuesCache = ...
481
482    // private static final String lowercaseKeys = ...
483
484    // private static final char[] lowercaseValues = ...
485
486    // private static final int[] lowercaseValuesCache = ...
487
488    // private static final String digitKeys = ...
489
490    // private static final char[] digitValues = ...
491
492    // private static final char[] typeTags = ...
493    // END android-removed
494
495    // BEGIN android-note
496    // put this in a helper class so that it's only initialized on demand?
497    // END android-note
498    private static final byte[] DIRECTIONALITY = new byte[] {
499            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
500            DIRECTIONALITY_EUROPEAN_NUMBER,
501            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
502            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
503            DIRECTIONALITY_ARABIC_NUMBER,
504            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
505            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
506            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
507            DIRECTIONALITY_OTHER_NEUTRALS,
508            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
509            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
510            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
511            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
512            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
513            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
514            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
515
516    // BEGIN android-removed
517    // private static final int ISJAVASTART = 1;
518
519    // private static final int ISJAVAPART = 2;
520
521    // Unicode 3.0.1 (same as Unicode 3.0.0)
522    // private static final String titlecaseKeys = ...
523
524    // private static final char[] titlecaseValues = ...
525
526    // Unicode 3.0.0 (NOT the same as Unicode 3.0.1)
527    // private static final String numericKeys = ...
528
529    // private static final char[] numericValues = ...
530    // END android-removed
531
532    /*
533     * Represents a subset of the Unicode character set.
534     */
535    public static class Subset {
536        String name;
537
538        /**
539         * Constructs a new {@code Subset}.
540         *
541         * @param string
542         *            this subset's name.
543         */
544        protected Subset(String string) {
545            if (string == null) {
546                throw new NullPointerException();
547            }
548            name = string;
549        }
550
551        /**
552         * Compares this character subset with the specified object. Uses
553         * {@link java.lang.Object#equals(Object)} to do the comparison.
554         *
555         * @param object
556         *            the object to compare this character subset with.
557         * @return {@code true} if {@code object} is this subset, that is, if
558         *         {@code object == this}; {@code false} otherwise.
559         */
560        @Override
561        public final boolean equals(Object object) {
562            return super.equals(object);
563        }
564
565        /**
566         * Returns the integer hash code for this character subset.
567         *
568         * @return this subset's hash code, which is the hash code computed by
569         *         {@link java.lang.Object#hashCode()}.
570         */
571        @Override
572        public final int hashCode() {
573            return super.hashCode();
574        }
575
576        /**
577         * Returns the string representation of this subset.
578         *
579         * @return this subset's name.
580         */
581        @Override
582        public final String toString() {
583            return name;
584        }
585    }
586
587    /**
588     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
589     * specification.
590     *
591     * @since 1.2
592     */
593    public static final class UnicodeBlock extends Subset {
594        /**
595         * The &quot;Surrogates Area&quot; Unicode Block.
596         *
597         * @deprecated As of Java 5, this block has been replaced by
598         *             {@link #HIGH_SURROGATES},
599         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
600         *             {@link #LOW_SURROGATES}.
601         */
602        @Deprecated
603        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
604        /**
605         * The &quot;Basic Latin&quot; Unicode Block.
606         *
607         * @since 1.2
608         */
609        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
610        /**
611         * The &quot;Latin-1 Supplement&quot; Unicode Block.
612         *
613         * @since 1.2
614         */
615        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
616        /**
617         * The &quot;Latin Extended-A&quot; Unicode Block.
618         *
619         * @since 1.2
620         */
621        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
622        /**
623         * The &quot;Latin Extended-B&quot; Unicode Block.
624         *
625         * @since 1.2
626         */
627        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
628        /**
629         * The &quot;IPA Extensions&quot; Unicode Block.
630         *
631         * @since 1.2
632         */
633        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
634        /**
635         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
636         *
637         * @since 1.2
638         */
639        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
640        /**
641         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
642         *
643         * @since 1.2
644         */
645        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
646        /**
647         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
648         * to as &quot;Greek&quot;.
649         *
650         * @since 1.2
651         */
652        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
653        /**
654         * The &quot;Cyrillic&quot; Unicode Block.
655         *
656         * @since 1.2
657         */
658        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
659        /**
660         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
661         * referred to as &quot;Cyrillic Supplementary&quot;.
662         *
663         * @since 1.5
664         */
665        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
666        /**
667         * The &quot;Armenian&quot; Unicode Block.
668         *
669         * @since 1.2
670         */
671        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
672        /**
673         * The &quot;Hebrew&quot; Unicode Block.
674         *
675         * @since 1.2
676         */
677        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
678        /**
679         * The &quot;Arabic&quot; Unicode Block.
680         *
681         * @since 1.2
682         */
683        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
684        /**
685         * The &quot;Syriac&quot; Unicode Block.
686         *
687         * @since 1.4
688         */
689        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
690        /**
691         * The &quot;Thaana&quot; Unicode Block.
692         *
693         * @since 1.4
694         */
695        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
696        /**
697         * The &quot;Devanagari&quot; Unicode Block.
698         *
699         * @since 1.2
700         */
701        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
702        /**
703         * The &quot;Bengali&quot; Unicode Block.
704         *
705         * @since 1.2
706         */
707        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
708        /**
709         * The &quot;Gurmukhi&quot; Unicode Block.
710         *
711         * @since 1.2
712         */
713        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
714        /**
715         * The &quot;Gujarati&quot; Unicode Block.
716         *
717         * @since 1.2
718         */
719        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
720        /**
721         * The &quot;Oriya&quot; Unicode Block.
722         *
723         * @since 1.2
724         */
725        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
726        /**
727         * The &quot;Tamil&quot; Unicode Block.
728         *
729         * @since 1.2
730         */
731        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
732        /**
733         * The &quot;Telugu&quot; Unicode Block.
734         *
735         * @since 1.2
736         */
737        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
738        /**
739         * The &quot;Kannada&quot; Unicode Block.
740         *
741         * @since 1.2
742         */
743        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
744        /**
745         * The &quot;Malayalam&quot; Unicode Block.
746         *
747         * @since 1.2
748         */
749        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
750        /**
751         * The &quot;Sinhala&quot; Unicode Block.
752         *
753         * @since 1.4
754         */
755        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
756        /**
757         * The &quot;Thai&quot; Unicode Block.
758         *
759         * @since 1.2
760         */
761        public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
762        /**
763         * The &quot;Lao&quot; Unicode Block.
764         *
765         * @since 1.2
766         */
767        public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
768        /**
769         * The &quot;Tibetan&quot; Unicode Block.
770         *
771         * @since 1.2
772         */
773        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
774        /**
775         * The &quot;Myanmar&quot; Unicode Block.
776         *
777         * @since 1.4
778         */
779        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
780        /**
781         * The &quot;Georgian&quot; Unicode Block.
782         *
783         * @since 1.2
784         */
785        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
786        /**
787         * The &quot;Hangul Jamo&quot; Unicode Block.
788         *
789         * @since 1.2
790         */
791        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
792        /**
793         * The &quot;Ethiopic&quot; Unicode Block.
794         *
795         * @since 1.4
796         */
797        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
798        /**
799         * The &quot;Cherokee&quot; Unicode Block.
800         *
801         * @since 1.4
802         */
803        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
804        /**
805         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
806         *
807         * @since 1.4
808         */
809        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
810        /**
811         * The &quot;Ogham&quot; Unicode Block.
812         *
813         * @since 1.4
814         */
815        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
816        /**
817         * The &quot;Runic&quot; Unicode Block.
818         *
819         * @since 1.4
820         */
821        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
822        /**
823         * The &quot;Tagalog&quot; Unicode Block.
824         *
825         * @since 1.5
826         */
827        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
828        /**
829         * The &quot;Hanunoo&quot; Unicode Block.
830         *
831         * @since 1.5
832         */
833        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
834        /**
835         * The &quot;Buhid&quot; Unicode Block.
836         *
837         * @since 1.5
838         */
839        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
840        /**
841         * The &quot;Tagbanwa&quot; Unicode Block.
842         *
843         * @since 1.5
844         */
845        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
846        /**
847         * The &quot;Khmer&quot; Unicode Block.
848         *
849         * @since 1.4
850         */
851        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
852        /**
853         * The &quot;Mongolian&quot; Unicode Block.
854         *
855         * @since 1.4
856         */
857        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
858        /**
859         * The &quot;Limbu&quot; Unicode Block.
860         *
861         * @since 1.5
862         */
863        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
864        /**
865         * The &quot;Tai Le&quot; Unicode Block.
866         *
867         * @since 1.5
868         */
869        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
870        /**
871         * The &quot;Khmer Symbols&quot; Unicode Block.
872         *
873         * @since 1.5
874         */
875        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
876        /**
877         * The &quot;Phonetic Extensions&quot; Unicode Block.
878         *
879         * @since 1.5
880         */
881        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
882        /**
883         * The &quot;Latin Extended Additional&quot; Unicode Block.
884         *
885         * @since 1.2
886         */
887        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
888        /**
889         * The &quot;Greek Extended&quot; Unicode Block.
890         *
891         * @since 1.2
892         */
893        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
894        /**
895         * The &quot;General Punctuation&quot; Unicode Block.
896         *
897         * @since 1.2
898         */
899        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
900        /**
901         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
902         *
903         * @since 1.2
904         */
905        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
906        /**
907         * The &quot;Currency Symbols&quot; Unicode Block.
908         *
909         * @since 1.2
910         */
911        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
912        /**
913         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
914         * Block. Previously referred to as &quot;Combining Marks for
915         * Symbols&quot;.
916         *
917         * @since 1.2
918         */
919        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
920        /**
921         * The &quot;Letterlike Symbols&quot; Unicode Block.
922         *
923         * @since 1.2
924         */
925        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
926        /**
927         * The &quot;Number Forms&quot; Unicode Block.
928         *
929         * @since 1.2
930         */
931        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
932        /**
933         * The &quot;Arrows&quot; Unicode Block.
934         *
935         * @since 1.2
936         */
937        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
938        /**
939         * The &quot;Mathematical Operators&quot; Unicode Block.
940         *
941         * @since 1.2
942         */
943        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
944        /**
945         * The &quot;Miscellaneous Technical&quot; Unicode Block.
946         *
947         * @since 1.2
948         */
949        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
950        /**
951         * The &quot;Control Pictures&quot; Unicode Block.
952         *
953         * @since 1.2
954         */
955        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
956        /**
957         * The &quot;Optical Character Recognition&quot; Unicode Block.
958         *
959         * @since 1.2
960         */
961        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
962        /**
963         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
964         *
965         * @since 1.2
966         */
967        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
968        /**
969         * The &quot;Box Drawing&quot; Unicode Block.
970         *
971         * @since 1.2
972         */
973        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
974        /**
975         * The &quot;Block Elements&quot; Unicode Block.
976         *
977         * @since 1.2
978         */
979        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
980        /**
981         * The &quot;Geometric Shapes&quot; Unicode Block.
982         *
983         * @since 1.2
984         */
985        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
986        /**
987         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
988         *
989         * @since 1.2
990         */
991        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
992        /**
993         * The &quot;Dingbats&quot; Unicode Block.
994         *
995         * @since 1.2
996         */
997        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
998        /**
999         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
1000         *
1001         * @since 1.5
1002         */
1003        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
1004        /**
1005         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
1006         *
1007         * @since 1.5
1008         */
1009        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
1010        /**
1011         * The &quot;Braille Patterns&quot; Unicode Block.
1012         *
1013         * @since 1.4
1014         */
1015        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
1016        /**
1017         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
1018         *
1019         * @since 1.5
1020         */
1021        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1022        /**
1023         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
1024         *
1025         * @since 1.5
1026         */
1027        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1028        /**
1029         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1030         *
1031         * @since 1.5
1032         */
1033        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1034        /**
1035         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1036         *
1037         * @since 1.2
1038         */
1039        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1040        /**
1041         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1042         *
1043         * @since 1.4
1044         */
1045        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1046        /**
1047         * The &quot;Kangxi Radicals&quot; Unicode Block.
1048         *
1049         * @since 1.4
1050         */
1051        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
1052        /**
1053         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1054         *
1055         * @since 1.4
1056         */
1057        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1058        /**
1059         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1060         *
1061         * @since 1.2
1062         */
1063        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1064        /**
1065         * The &quot;Hiragana&quot; Unicode Block.
1066         *
1067         * @since 1.2
1068         */
1069        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
1070        /**
1071         * The &quot;Katakana&quot; Unicode Block.
1072         *
1073         * @since 1.2
1074         */
1075        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
1076        /**
1077         * The &quot;Bopomofo&quot; Unicode Block.
1078         *
1079         * @since 1.2
1080         */
1081        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
1082        /**
1083         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1084         *
1085         * @since 1.2
1086         */
1087        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1088        /**
1089         * The &quot;Kanbun&quot; Unicode Block.
1090         *
1091         * @since 1.2
1092         */
1093        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
1094        /**
1095         * The &quot;Bopomofo Extended&quot; Unicode Block.
1096         *
1097         * @since 1.4
1098         */
1099        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1100        /**
1101         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1102         *
1103         * @since 1.5
1104         */
1105        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1106        /**
1107         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1108         *
1109         * @since 1.2
1110         */
1111        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1112        /**
1113         * The &quot;CJK Compatibility&quot; Unicode Block.
1114         *
1115         * @since 1.2
1116         */
1117        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
1118        /**
1119         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1120         *
1121         * @since 1.4
1122         */
1123        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1124        /**
1125         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1126         *
1127         * @since 1.5
1128         */
1129        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1130        /**
1131         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1132         *
1133         * @since 1.2
1134         */
1135        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1136        /**
1137         * The &quot;Yi Syllables&quot; Unicode Block.
1138         *
1139         * @since 1.4
1140         */
1141        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
1142        /**
1143         * The &quot;Yi Radicals&quot; Unicode Block.
1144         *
1145         * @since 1.4
1146         */
1147        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
1148        /**
1149         * The &quot;Hangul Syllables&quot; Unicode Block.
1150         *
1151         * @since 1.2
1152         */
1153        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
1154        /**
1155         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1156         * code point values in the high surrogate range 0xD800 to 0xDB7F
1157         */
1158        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
1159        /**
1160         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1161         * represents code point values in the high surrogate range 0xDB80 to
1162         * 0xDBFF
1163         */
1164        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1165        /**
1166         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1167         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1168         */
1169        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
1170        /**
1171         * The &quot;Private Use Area&quot; Unicode Block.
1172         *
1173         * @since 1.2
1174         */
1175        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1176        /**
1177         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1178         *
1179         * @since 1.2
1180         */
1181        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1182        /**
1183         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1184         *
1185         * @since 1.2
1186         */
1187        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1188        /**
1189         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1190         *
1191         * @since 1.2
1192         */
1193        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1194        /**
1195         * The &quot;Variation Selectors&quot; Unicode Block.
1196         *
1197         * @since 1.5
1198         */
1199        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1200        /**
1201         * The &quot;Combining Half Marks&quot; Unicode Block.
1202         *
1203         * @since 1.2
1204         */
1205        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1206        /**
1207         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1208         *
1209         * @since 1.2
1210         */
1211        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1212        /**
1213         * The &quot;Small Form Variants&quot; Unicode Block.
1214         *
1215         * @since 1.2
1216         */
1217        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1218        /**
1219         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1220         *
1221         * @since 1.2
1222         */
1223        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1224        /**
1225         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1226         *
1227         * @since 1.2
1228         */
1229        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1230        /**
1231         * The &quot;Specials&quot; Unicode Block.
1232         *
1233         * @since 1.2
1234         */
1235        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
1236        /**
1237         * The &quot;Linear B Syllabary&quot; Unicode Block.
1238         *
1239         * @since 1.2
1240         */
1241        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1242        /**
1243         * The &quot;Linear B Ideograms&quot; Unicode Block.
1244         *
1245         * @since 1.5
1246         */
1247        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1248        /**
1249         * The &quot;Aegean Numbers&quot; Unicode Block.
1250         *
1251         * @since 1.5
1252         */
1253        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
1254        /**
1255         * The &quot;Old Italic&quot; Unicode Block.
1256         *
1257         * @since 1.5
1258         */
1259        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
1260        /**
1261         * The &quot;Gothic&quot; Unicode Block.
1262         *
1263         * @since 1.5
1264         */
1265        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
1266        /**
1267         * The &quot;Ugaritic&quot; Unicode Block.
1268         *
1269         * @since 1.5
1270         */
1271        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
1272        /**
1273         * The &quot;Deseret&quot; Unicode Block.
1274         *
1275         * @since 1.5
1276         */
1277        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
1278        /**
1279         * The &quot;Shavian&quot; Unicode Block.
1280         *
1281         * @since 1.5
1282         */
1283        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
1284        /**
1285         * The &quot;Osmanya&quot; Unicode Block.
1286         *
1287         * @since 1.5
1288         */
1289        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
1290        /**
1291         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1292         *
1293         * @since 1.5
1294         */
1295        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1296        /**
1297         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1298         *
1299         * @since 1.5
1300         */
1301        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1302        /**
1303         * The &quot;Musical Symbols&quot; Unicode Block.
1304         *
1305         * @since 1.5
1306         */
1307        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1308        /**
1309         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1310         *
1311         * @since 1.5
1312         */
1313        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1314        /**
1315         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1316         *
1317         * @since 1.5
1318         */
1319        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1320        /**
1321         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1322         *
1323         * @since 1.5
1324         */
1325        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1326        /**
1327         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1328         *
1329         * @since 1.5
1330         */
1331        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
1332        /**
1333         * The &quot;Tags&quot; Unicode Block.
1334         *
1335         * @since 1.5
1336         */
1337        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
1338        /**
1339         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1340         *
1341         * @since 1.5
1342         */
1343        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1344        /**
1345         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1346         *
1347         * @since 1.5
1348         */
1349        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1350        /**
1351         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1352         *
1353         * @since 1.5
1354         */
1355        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1356
1357        /*
1358         * All of the UnicodeBlocks with valid ranges in ascending order.
1359         */
1360        private static UnicodeBlock[] BLOCKS;
1361
1362        // BEGIN android-changed
1363        // /*
1364        //  * A SortedMap (String.CASE_INSENSITIVE_ORDER) with keys that represents
1365        //  * valid block names and values of the UnicodeBlock constant they map
1366        //  * to.
1367        //  */
1368        // private static final SortedMap<String, UnicodeBlock> BLOCKS_BY_NAME = ...;
1369        // END android-changed
1370
1371        /**
1372         * Retrieves the constant that corresponds to the specified block name.
1373         * The block names are defined by the Unicode 4.0.1 specification in the
1374         * {@code Blocks-4.0.1.txt} file.
1375         * <p>
1376         * Block names may be one of the following:
1377         * <ul>
1378         * <li>Canonical block name, as defined by the Unicode specification;
1379         * case-insensitive.</li>
1380         * <li>Canonical block name without any spaces, as defined by the
1381         * Unicode specification; case-insensitive.</li>
1382         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1383         * uppercasing the canonical name and replacing all spaces and hyphens
1384         * with underscores.</li>
1385         * </ul>
1386         *
1387         * @param blockName
1388         *            the name of the block to retrieve.
1389         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1390         * @throws NullPointerException
1391         *             if {@code blockName} is {@code null}.
1392         * @throws IllegalArgumentException
1393         *             if {@code blockName} is not a valid block name.
1394         * @since 1.5
1395         */
1396        public static final UnicodeBlock forName(String blockName) {
1397            // BEGIN android-note
1398            // trying to get closer to the RI which defines this as final.
1399            // END android-note
1400            if (blockName == null) {
1401                throw new NullPointerException();
1402            }
1403            // BEGIN android-changed
1404            if (BLOCKS == null) {
1405                BLOCKS = UCharacter.getBlockTable();
1406            }
1407            int block = UCharacter.forName(blockName);
1408            if (block == -1) {
1409                if(blockName.equals("SURROGATES_AREA")) {
1410                    return SURROGATES_AREA;
1411                } else if(blockName.equalsIgnoreCase("greek")) {
1412                    return GREEK;
1413                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1414                        blockName.equals("Combining Marks for Symbols") ||
1415                        blockName.equals("CombiningMarksforSymbols")) {
1416                    return COMBINING_MARKS_FOR_SYMBOLS;
1417                }
1418                throw new IllegalArgumentException();
1419            }
1420            return BLOCKS[block];
1421            // END android-changed
1422        }
1423
1424        /**
1425         * Gets the constant for the Unicode block that contains the specified
1426         * character.
1427         *
1428         * @param c
1429         *            the character for which to get the {@code UnicodeBlock}
1430         *            constant.
1431         * @return the {@code UnicodeBlock} constant for the block that contains
1432         *         {@code c}, or {@code null} if {@code c} does not belong to
1433         *         any defined block.
1434         */
1435        public static UnicodeBlock of(char c) {
1436            return of((int) c);
1437        }
1438
1439        /**
1440         * Gets the constant for the Unicode block that contains the specified
1441         * Unicode code point.
1442         *
1443         * @param codePoint
1444         *            the Unicode code point for which to get the
1445         *            {@code UnicodeBlock} constant.
1446         * @return the {@code UnicodeBlock} constant for the block that contains
1447         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1448         *         not belong to any defined block.
1449         * @throws IllegalArgumentException
1450         *             if {@code codePoint} is not a valid Unicode code point.
1451         * @since 1.5
1452         */
1453        public static UnicodeBlock of(int codePoint) {
1454            if (!isValidCodePoint(codePoint)) {
1455                throw new IllegalArgumentException();
1456            }
1457            // BEGIN android-changed
1458            if (BLOCKS == null) {
1459                BLOCKS = UCharacter.getBlockTable();
1460            }
1461            int block = UCharacter.of(codePoint);
1462            if(block == -1 || block >= BLOCKS.length) {
1463                return null;
1464            }
1465            return BLOCKS[block];
1466            // END android-changed
1467        }
1468
1469        // BEGIN android-changed
1470        private UnicodeBlock(String blockName, int start, int end) {
1471            super(blockName);
1472        }
1473        // END android-changed
1474    }
1475
1476    /**
1477     * Constructs a new {@code Character} with the specified primitive char
1478     * value.
1479     *
1480     * @param value
1481     *            the primitive char value to store in the new instance.
1482     */
1483    public Character(char value) {
1484        this.value = value;
1485    }
1486
1487    /**
1488     * Gets the primitive value of this character.
1489     *
1490     * @return this object's primitive value.
1491     */
1492    public char charValue() {
1493        return value;
1494    }
1495
1496    /**
1497     * Compares this object to the specified character object to determine their
1498     * relative order.
1499     *
1500     * @param c
1501     *            the character object to compare this object to.
1502     * @return {@code 0} if the value of this character and the value of
1503     *         {@code c} are equal; a positive value if the value of this
1504     *         character is greater than the value of {@code c}; a negative
1505     *         value if the value of this character is less than the value of
1506     *         {@code c}.
1507     * @see java.lang.Comparable
1508     * @since 1.2
1509     */
1510    public int compareTo(Character c) {
1511        return value - c.value;
1512    }
1513
1514    /**
1515     * Returns a {@code Character} instance for the {@code char} value passed.
1516     * <p>
1517     * If it is not necessary to get a new {@code Character} instance, it is
1518     * recommended to use this method instead of the constructor, since it
1519     * maintains a cache of instances which may result in better performance.
1520     *
1521     * @param c
1522     *            the char value for which to get a {@code Character} instance.
1523     * @return the {@code Character} instance for {@code c}.
1524     * @since 1.5
1525     */
1526    public static Character valueOf(char c) {
1527        return c < 128 ? SMALL_VALUES[c] : new Character(c);
1528    }
1529
1530    /**
1531     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1532     */
1533    private static final Character[] SMALL_VALUES = new Character[128];
1534
1535    static {
1536        for(int i = 0; i < 128; i++) {
1537            SMALL_VALUES[i] = new Character((char) i);
1538        }
1539    }
1540    /**
1541     * Indicates whether {@code codePoint} is a valid Unicode code point.
1542     *
1543     * @param codePoint
1544     *            the code point to test.
1545     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1546     *         {@code false} otherwise.
1547     * @since 1.5
1548     */
1549    public static boolean isValidCodePoint(int codePoint) {
1550        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1551    }
1552
1553    /**
1554     * Indicates whether {@code codePoint} is within the supplementary code
1555     * point range.
1556     *
1557     * @param codePoint
1558     *            the code point to test.
1559     * @return {@code true} if {@code codePoint} is within the supplementary
1560     *         code point range; {@code false} otherwise.
1561     * @since 1.5
1562     */
1563    public static boolean isSupplementaryCodePoint(int codePoint) {
1564        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1565    }
1566
1567    /**
1568     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1569     * that is used for representing supplementary characters in UTF-16
1570     * encoding.
1571     *
1572     * @param ch
1573     *            the character to test.
1574     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1575     *         {@code false} otherwise.
1576     * @see #isLowSurrogate(char)
1577     * @since 1.5
1578     */
1579    public static boolean isHighSurrogate(char ch) {
1580        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1581    }
1582
1583    /**
1584     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1585     * that is used for representing supplementary characters in UTF-16
1586     * encoding.
1587     *
1588     * @param ch
1589     *            the character to test.
1590     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1591     *         {@code false} otherwise.
1592     * @see #isHighSurrogate(char)
1593     * @since 1.5
1594     */
1595    public static boolean isLowSurrogate(char ch) {
1596        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1597    }
1598
1599    /**
1600     * Indicates whether the specified character pair is a valid surrogate pair.
1601     *
1602     * @param high
1603     *            the high surrogate unit to test.
1604     * @param low
1605     *            the low surrogate unit to test.
1606     * @return {@code true} if {@code high} is a high-surrogate code unit and
1607     *         {@code low} is a low-surrogate code unit; {@code false}
1608     *         otherwise.
1609     * @see #isHighSurrogate(char)
1610     * @see #isLowSurrogate(char)
1611     * @since 1.5
1612     */
1613    public static boolean isSurrogatePair(char high, char low) {
1614        return (isHighSurrogate(high) && isLowSurrogate(low));
1615    }
1616
1617    /**
1618     * Calculates the number of {@code char} values required to represent the
1619     * specified Unicode code point. This method checks if the {@code codePoint}
1620     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1621     * returned, otherwise {@code 1}. To test if the code point is valid, use
1622     * the {@link #isValidCodePoint(int)} method.
1623     *
1624     * @param codePoint
1625     *            the code point for which to calculate the number of required
1626     *            chars.
1627     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1628     * @see #isValidCodePoint(int)
1629     * @see #isSupplementaryCodePoint(int)
1630     * @since 1.5
1631     */
1632    public static int charCount(int codePoint) {
1633        return (codePoint >= 0x10000 ? 2 : 1);
1634    }
1635
1636    /**
1637     * Converts a surrogate pair into a Unicode code point. This method assumes
1638     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1639     * surrogates, then the result is indeterminate. The
1640     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1641     * method to validate the pair.
1642     *
1643     * @param high
1644     *            the high surrogate unit.
1645     * @param low
1646     *            the low surrogate unit.
1647     * @return the Unicode code point corresponding to the surrogate unit pair.
1648     * @see #isSurrogatePair(char, char)
1649     * @since 1.5
1650     */
1651    public static int toCodePoint(char high, char low) {
1652        // See RFC 2781, Section 2.2
1653        // http://www.faqs.org/rfcs/rfc2781.html
1654        int h = (high & 0x3FF) << 10;
1655        int l = low & 0x3FF;
1656        return (h | l) + 0x10000;
1657    }
1658
1659    /**
1660     * Returns the code point at {@code index} in the specified sequence of
1661     * character units. If the unit at {@code index} is a high-surrogate unit,
1662     * {@code index + 1} is less than the length of the sequence and the unit at
1663     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1664     * point represented by the pair is returned; otherwise the {@code char}
1665     * value at {@code index} is returned.
1666     *
1667     * @param seq
1668     *            the source sequence of {@code char} units.
1669     * @param index
1670     *            the position in {@code seq} from which to retrieve the code
1671     *            point.
1672     * @return the Unicode code point or {@code char} value at {@code index} in
1673     *         {@code seq}.
1674     * @throws NullPointerException
1675     *             if {@code seq} is {@code null}.
1676     * @throws IndexOutOfBoundsException
1677     *             if the {@code index} is negative or greater than or equal to
1678     *             the length of {@code seq}.
1679     * @since 1.5
1680     */
1681    public static int codePointAt(CharSequence seq, int index) {
1682        if (seq == null) {
1683            throw new NullPointerException();
1684        }
1685        int len = seq.length();
1686        if (index < 0 || index >= len) {
1687            throw new IndexOutOfBoundsException();
1688        }
1689
1690        char high = seq.charAt(index++);
1691        if (index >= len) {
1692            return high;
1693        }
1694        char low = seq.charAt(index);
1695        if (isSurrogatePair(high, low)) {
1696            return toCodePoint(high, low);
1697        }
1698        return high;
1699    }
1700
1701    /**
1702     * Returns the code point at {@code index} in the specified array of
1703     * character units. If the unit at {@code index} is a high-surrogate unit,
1704     * {@code index + 1} is less than the length of the array and the unit at
1705     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1706     * point represented by the pair is returned; otherwise the {@code char}
1707     * value at {@code index} is returned.
1708     *
1709     * @param seq
1710     *            the source array of {@code char} units.
1711     * @param index
1712     *            the position in {@code seq} from which to retrieve the code
1713     *            point.
1714     * @return the Unicode code point or {@code char} value at {@code index} in
1715     *         {@code seq}.
1716     * @throws NullPointerException
1717     *             if {@code seq} is {@code null}.
1718     * @throws IndexOutOfBoundsException
1719     *             if the {@code index} is negative or greater than or equal to
1720     *             the length of {@code seq}.
1721     * @since 1.5
1722     */
1723    public static int codePointAt(char[] seq, int index) {
1724        if (seq == null) {
1725            throw new NullPointerException();
1726        }
1727        int len = seq.length;
1728        if (index < 0 || index >= len) {
1729            throw new IndexOutOfBoundsException();
1730        }
1731
1732        char high = seq[index++];
1733        if (index >= len) {
1734            return high;
1735        }
1736        char low = seq[index];
1737        if (isSurrogatePair(high, low)) {
1738            return toCodePoint(high, low);
1739        }
1740        return high;
1741    }
1742
1743    /**
1744     * Returns the code point at {@code index} in the specified array of
1745     * character units, where {@code index} has to be less than {@code limit}.
1746     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1747     * is less than {@code limit} and the unit at {@code index + 1} is a
1748     * low-surrogate unit, then the supplementary code point represented by the
1749     * pair is returned; otherwise the {@code char} value at {@code index} is
1750     * returned.
1751     *
1752     * @param seq
1753     *            the source array of {@code char} units.
1754     * @param index
1755     *            the position in {@code seq} from which to get the code point.
1756     * @param limit
1757     *            the index after the last unit in {@code seq} that can be used.
1758     * @return the Unicode code point or {@code char} value at {@code index} in
1759     *         {@code seq}.
1760     * @throws NullPointerException
1761     *             if {@code seq} is {@code null}.
1762     * @throws IndexOutOfBoundsException
1763     *             if {@code index < 0}, {@code index >= limit},
1764     *             {@code limit < 0} or if {@code limit} is greater than the
1765     *             length of {@code seq}.
1766     * @since 1.5
1767     */
1768    public static int codePointAt(char[] seq, int index, int limit) {
1769        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1770            throw new IndexOutOfBoundsException();
1771        }
1772
1773        char high = seq[index++];
1774        if (index >= limit) {
1775            return high;
1776        }
1777        char low = seq[index];
1778        if (isSurrogatePair(high, low)) {
1779            return toCodePoint(high, low);
1780        }
1781        return high;
1782    }
1783
1784    /**
1785     * Returns the code point that precedes {@code index} in the specified
1786     * sequence of character units. If the unit at {@code index - 1} is a
1787     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1788     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1789     * point represented by the pair is returned; otherwise the {@code char}
1790     * value at {@code index - 1} is returned.
1791     *
1792     * @param seq
1793     *            the source sequence of {@code char} units.
1794     * @param index
1795     *            the position in {@code seq} following the code
1796     *            point that should be returned.
1797     * @return the Unicode code point or {@code char} value before {@code index}
1798     *         in {@code seq}.
1799     * @throws NullPointerException
1800     *             if {@code seq} is {@code null}.
1801     * @throws IndexOutOfBoundsException
1802     *             if the {@code index} is less than 1 or greater than the
1803     *             length of {@code seq}.
1804     * @since 1.5
1805     */
1806    public static int codePointBefore(CharSequence seq, int index) {
1807        if (seq == null) {
1808            throw new NullPointerException();
1809        }
1810        int len = seq.length();
1811        if (index < 1 || index > len) {
1812            throw new IndexOutOfBoundsException();
1813        }
1814
1815        char low = seq.charAt(--index);
1816        if (--index < 0) {
1817            return low;
1818        }
1819        char high = seq.charAt(index);
1820        if (isSurrogatePair(high, low)) {
1821            return toCodePoint(high, low);
1822        }
1823        return low;
1824    }
1825
1826    /**
1827     * Returns the code point that precedes {@code index} in the specified
1828     * array of character units. If the unit at {@code index - 1} is a
1829     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1830     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1831     * point represented by the pair is returned; otherwise the {@code char}
1832     * value at {@code index - 1} is returned.
1833     *
1834     * @param seq
1835     *            the source array of {@code char} units.
1836     * @param index
1837     *            the position in {@code seq} following the code
1838     *            point that should be returned.
1839     * @return the Unicode code point or {@code char} value before {@code index}
1840     *         in {@code seq}.
1841     * @throws NullPointerException
1842     *             if {@code seq} is {@code null}.
1843     * @throws IndexOutOfBoundsException
1844     *             if the {@code index} is less than 1 or greater than the
1845     *             length of {@code seq}.
1846     * @since 1.5
1847     */
1848    public static int codePointBefore(char[] seq, int index) {
1849        if (seq == null) {
1850            throw new NullPointerException();
1851        }
1852        int len = seq.length;
1853        if (index < 1 || index > len) {
1854            throw new IndexOutOfBoundsException();
1855        }
1856
1857        char low = seq[--index];
1858        if (--index < 0) {
1859            return low;
1860        }
1861        char high = seq[index];
1862        if (isSurrogatePair(high, low)) {
1863            return toCodePoint(high, low);
1864        }
1865        return low;
1866    }
1867
1868    /**
1869     * Returns the code point that precedes the {@code index} in the specified
1870     * array of character units and is not less than {@code start}. If the unit
1871     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1872     * less than {@code start} and the unit at {@code index - 2} is a
1873     * high-surrogate unit, then the supplementary code point represented by the
1874     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1875     * is returned.
1876     *
1877     * @param seq
1878     *            the source array of {@code char} units.
1879     * @param index
1880     *            the position in {@code seq} following the code point that
1881     *            should be returned.
1882     * @param start
1883     *            the index of the first element in {@code seq}.
1884     * @return the Unicode code point or {@code char} value before {@code index}
1885     *         in {@code seq}.
1886     * @throws NullPointerException
1887     *             if {@code seq} is {@code null}.
1888     * @throws IndexOutOfBoundsException
1889     *             if the {@code index <= start}, {@code start < 0},
1890     *             {@code index} is greater than the length of {@code seq}, or
1891     *             if {@code start} is equal or greater than the length of
1892     *             {@code seq}.
1893     * @since 1.5
1894     */
1895    public static int codePointBefore(char[] seq, int index, int start) {
1896        if (seq == null) {
1897            throw new NullPointerException();
1898        }
1899        int len = seq.length;
1900        if (index <= start || index > len || start < 0 || start >= len) {
1901            throw new IndexOutOfBoundsException();
1902        }
1903
1904        char low = seq[--index];
1905        if (--index < start) {
1906            return low;
1907        }
1908        char high = seq[index];
1909        if (isSurrogatePair(high, low)) {
1910            return toCodePoint(high, low);
1911        }
1912        return low;
1913    }
1914
1915    /**
1916     * Converts the specified Unicode code point into a UTF-16 encoded sequence
1917     * and copies the value(s) into the char array {@code dst}, starting at
1918     * index {@code dstIndex}.
1919     *
1920     * @param codePoint
1921     *            the Unicode code point to encode.
1922     * @param dst
1923     *            the destination array to copy the encoded value into.
1924     * @param dstIndex
1925     *            the index in {@code dst} from where to start copying.
1926     * @return the number of {@code char} value units copied into {@code dst}.
1927     * @throws IllegalArgumentException
1928     *             if {@code codePoint} is not a valid Unicode code point.
1929     * @throws NullPointerException
1930     *             if {@code dst} is {@code null}.
1931     * @throws IndexOutOfBoundsException
1932     *             if {@code dstIndex} is negative, greater than or equal to
1933     *             {@code dst.length} or equals {@code dst.length - 1} when
1934     *             {@code codePoint} is a
1935     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
1936     * @since 1.5
1937     */
1938    public static int toChars(int codePoint, char[] dst, int dstIndex) {
1939        if (!isValidCodePoint(codePoint)) {
1940            throw new IllegalArgumentException();
1941        }
1942        if (dst == null) {
1943            throw new NullPointerException();
1944        }
1945        if (dstIndex < 0 || dstIndex >= dst.length) {
1946            throw new IndexOutOfBoundsException();
1947        }
1948
1949        if (isSupplementaryCodePoint(codePoint)) {
1950            if (dstIndex == dst.length - 1) {
1951                throw new IndexOutOfBoundsException();
1952            }
1953            // See RFC 2781, Section 2.1
1954            // http://www.faqs.org/rfcs/rfc2781.html
1955            int cpPrime = codePoint - 0x10000;
1956            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
1957            int low = 0xDC00 | (cpPrime & 0x3FF);
1958            dst[dstIndex] = (char) high;
1959            dst[dstIndex + 1] = (char) low;
1960            return 2;
1961        }
1962
1963        dst[dstIndex] = (char) codePoint;
1964        return 1;
1965    }
1966
1967    /**
1968     * Converts the specified Unicode code point into a UTF-16 encoded sequence
1969     * and returns it as a char array.
1970     *
1971     * @param codePoint
1972     *            the Unicode code point to encode.
1973     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
1974     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
1975     *         then the returned array contains two characters, otherwise it
1976     *         contains just one character.
1977     * @throws IllegalArgumentException
1978     *             if {@code codePoint} is not a valid Unicode code point.
1979     * @since 1.5
1980     */
1981    public static char[] toChars(int codePoint) {
1982        if (!isValidCodePoint(codePoint)) {
1983            throw new IllegalArgumentException();
1984        }
1985
1986        if (isSupplementaryCodePoint(codePoint)) {
1987            int cpPrime = codePoint - 0x10000;
1988            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
1989            int low = 0xDC00 | (cpPrime & 0x3FF);
1990            return new char[] { (char) high, (char) low };
1991        }
1992        return new char[] { (char) codePoint };
1993    }
1994
1995    /**
1996     * Counts the number of Unicode code points in the subsequence of the
1997     * specified character sequence, as delineated by {@code beginIndex} and
1998     * {@code endIndex}. Any surrogate values with missing pair values will be
1999     * counted as one code point.
2000     *
2001     * @param seq
2002     *            the {@code CharSequence} to look through.
2003     * @param beginIndex
2004     *            the inclusive index to begin counting at.
2005     * @param endIndex
2006     *            the exclusive index to stop counting at.
2007     * @return the number of Unicode code points.
2008     * @throws NullPointerException
2009     *             if {@code seq} is {@code null}.
2010     * @throws IndexOutOfBoundsException
2011     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2012     *             if {@code endIndex} is greater than the length of {@code seq}.
2013     * @since 1.5
2014     */
2015    public static int codePointCount(CharSequence seq, int beginIndex,
2016            int endIndex) {
2017        if (seq == null) {
2018            throw new NullPointerException();
2019        }
2020        int len = seq.length();
2021        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2022            throw new IndexOutOfBoundsException();
2023        }
2024
2025        int result = 0;
2026        for (int i = beginIndex; i < endIndex; i++) {
2027            char c = seq.charAt(i);
2028            if (isHighSurrogate(c)) {
2029                if (++i < endIndex) {
2030                    c = seq.charAt(i);
2031                    if (!isLowSurrogate(c)) {
2032                        result++;
2033                    }
2034                }
2035            }
2036            result++;
2037        }
2038        return result;
2039    }
2040
2041    /**
2042     * Counts the number of Unicode code points in the subsequence of the
2043     * specified char array, as delineated by {@code offset} and {@code count}.
2044     * Any surrogate values with missing pair values will be counted as one code
2045     * point.
2046     *
2047     * @param seq
2048     *            the char array to look through
2049     * @param offset
2050     *            the inclusive index to begin counting at.
2051     * @param count
2052     *            the number of {@code char} values to look through in
2053     *            {@code seq}.
2054     * @return the number of Unicode code points.
2055     * @throws NullPointerException
2056     *             if {@code seq} is {@code null}.
2057     * @throws IndexOutOfBoundsException
2058     *             if {@code offset < 0}, {@code count < 0} or if
2059     *             {@code offset + count} is greater than the length of
2060     *             {@code seq}.
2061     * @since 1.5
2062     */
2063    public static int codePointCount(char[] seq, int offset, int count) {
2064        if (seq == null) {
2065            throw new NullPointerException();
2066        }
2067        int len = seq.length;
2068        int endIndex = offset + count;
2069        if (offset < 0 || count < 0 || endIndex > len) {
2070            throw new IndexOutOfBoundsException();
2071        }
2072
2073        int result = 0;
2074        for (int i = offset; i < endIndex; i++) {
2075            char c = seq[i];
2076            if (isHighSurrogate(c)) {
2077                if (++i < endIndex) {
2078                    c = seq[i];
2079                    if (!isLowSurrogate(c)) {
2080                        result++;
2081                    }
2082                }
2083            }
2084            result++;
2085        }
2086        return result;
2087    }
2088
2089    /**
2090     * Determines the index in the specified character sequence that is offset
2091     * {@code codePointOffset} code points from {@code index}.
2092     *
2093     * @param seq
2094     *            the character sequence to find the index in.
2095     * @param index
2096     *            the start index in {@code seq}.
2097     * @param codePointOffset
2098     *            the number of code points to look backwards or forwards; may
2099     *            be a negative or positive value.
2100     * @return the index in {@code seq} that is {@code codePointOffset} code
2101     *         points away from {@code index}.
2102     * @throws NullPointerException
2103     *             if {@code seq} is {@code null}.
2104     * @throws IndexOutOfBoundsException
2105     *             if {@code index < 0}, {@code index} is greater than the
2106     *             length of {@code seq}, or if there are not enough values in
2107     *             {@code seq} to skip {@code codePointOffset} code points
2108     *             forwards or backwards (if {@code codePointOffset} is
2109     *             negative) from {@code index}.
2110     * @since 1.5
2111     */
2112    public static int offsetByCodePoints(CharSequence seq, int index,
2113            int codePointOffset) {
2114        if (seq == null) {
2115            throw new NullPointerException();
2116        }
2117        int len = seq.length();
2118        if (index < 0 || index > len) {
2119            throw new IndexOutOfBoundsException();
2120        }
2121
2122        if (codePointOffset == 0) {
2123            return index;
2124        }
2125
2126        if (codePointOffset > 0) {
2127            int codePoints = codePointOffset;
2128            int i = index;
2129            while (codePoints > 0) {
2130                codePoints--;
2131                if (i >= len) {
2132                    throw new IndexOutOfBoundsException();
2133                }
2134                if (isHighSurrogate(seq.charAt(i))) {
2135                    int next = i + 1;
2136                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2137                        i++;
2138                    }
2139                }
2140                i++;
2141            }
2142            return i;
2143        }
2144
2145        assert codePointOffset < 0;
2146        int codePoints = -codePointOffset;
2147        int i = index;
2148        while (codePoints > 0) {
2149            codePoints--;
2150            i--;
2151            if (i < 0) {
2152                throw new IndexOutOfBoundsException();
2153            }
2154            if (isLowSurrogate(seq.charAt(i))) {
2155                int prev = i - 1;
2156                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2157                    i--;
2158                }
2159            }
2160        }
2161        return i;
2162    }
2163
2164    /**
2165     * Determines the index in a subsequence of the specified character array
2166     * that is offset {@code codePointOffset} code points from {@code index}.
2167     * The subsequence is delineated by {@code start} and {@code count}.
2168     *
2169     * @param seq
2170     *            the character array to find the index in.
2171     * @param start
2172     *            the inclusive index that marks the beginning of the
2173     *            subsequence.
2174     * @param count
2175     *            the number of {@code char} values to include within the
2176     *            subsequence.
2177     * @param index
2178     *            the start index in the subsequence of the char array.
2179     * @param codePointOffset
2180     *            the number of code points to look backwards or forwards; may
2181     *            be a negative or positive value.
2182     * @return the index in {@code seq} that is {@code codePointOffset} code
2183     *         points away from {@code index}.
2184     * @throws NullPointerException
2185     *             if {@code seq} is {@code null}.
2186     * @throws IndexOutOfBoundsException
2187     *             if {@code start < 0}, {@code count < 0},
2188     *             {@code index < start}, {@code index > start + count},
2189     *             {@code start + count} is greater than the length of
2190     *             {@code seq}, or if there are not enough values in
2191     *             {@code seq} to skip {@code codePointOffset} code points
2192     *             forward or backward (if {@code codePointOffset} is
2193     *             negative) from {@code index}.
2194     * @since 1.5
2195     */
2196    public static int offsetByCodePoints(char[] seq, int start, int count,
2197            int index, int codePointOffset) {
2198        if (seq == null) {
2199            throw new NullPointerException();
2200        }
2201        int end = start + count;
2202        if (start < 0 || count < 0 || end > seq.length || index < start
2203                || index > end) {
2204            throw new IndexOutOfBoundsException();
2205        }
2206
2207        if (codePointOffset == 0) {
2208            return index;
2209        }
2210
2211        if (codePointOffset > 0) {
2212            int codePoints = codePointOffset;
2213            int i = index;
2214            while (codePoints > 0) {
2215                codePoints--;
2216                if (i >= end) {
2217                    throw new IndexOutOfBoundsException();
2218                }
2219                if (isHighSurrogate(seq[i])) {
2220                    int next = i + 1;
2221                    if (next < end && isLowSurrogate(seq[next])) {
2222                        i++;
2223                    }
2224                }
2225                i++;
2226            }
2227            return i;
2228        }
2229
2230        assert codePointOffset < 0;
2231        int codePoints = -codePointOffset;
2232        int i = index;
2233        while (codePoints > 0) {
2234            codePoints--;
2235            i--;
2236            if (i < start) {
2237                throw new IndexOutOfBoundsException();
2238            }
2239            if (isLowSurrogate(seq[i])) {
2240                int prev = i - 1;
2241                if (prev >= start && isHighSurrogate(seq[prev])) {
2242                    i--;
2243                }
2244            }
2245        }
2246        return i;
2247    }
2248
2249    /**
2250     * Convenience method to determine the value of the specified character
2251     * {@code c} in the supplied radix. The value of {@code radix} must be
2252     * between MIN_RADIX and MAX_RADIX.
2253     *
2254     * @param c
2255     *            the character to determine the value of.
2256     * @param radix
2257     *            the radix.
2258     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2259     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2260     */
2261    public static int digit(char c, int radix) {
2262        // BEGIN android-changed
2263        return digit((int) c, radix);
2264        // END android-changed
2265    }
2266
2267    /**
2268     * Convenience method to determine the value of the character
2269     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2270     * be between MIN_RADIX and MAX_RADIX.
2271     *
2272     * @param codePoint
2273     *            the character, including supplementary characters.
2274     * @param radix
2275     *            the radix.
2276     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2277     *         {@link #MAX_RADIX} then the value of the character in the radix;
2278     *         -1 otherwise.
2279     */
2280    public static int digit(int codePoint, int radix) {
2281        // BEGIN android-changed
2282        if (radix < MIN_RADIX || radix > MAX_RADIX) {
2283            return -1;
2284        }
2285        if (codePoint < 128) {
2286            // Optimized for ASCII
2287            int result = -1;
2288            if ('0' <= codePoint && codePoint <= '9') {
2289                result = codePoint - '0';
2290            } else if ('a' <= codePoint && codePoint <= 'z') {
2291                result = 10 + (codePoint - 'a');
2292            } else if ('A' <= codePoint && codePoint <= 'Z') {
2293                result = 10 + (codePoint - 'A');
2294            }
2295            return result < radix ? result : -1;
2296        }
2297        return UCharacter.digit(codePoint, radix);
2298        // END android-changed
2299    }
2300
2301    /**
2302     * Compares this object with the specified object and indicates if they are
2303     * equal. In order to be equal, {@code object} must be an instance of
2304     * {@code Character} and have the same char value as this object.
2305     *
2306     * @param object
2307     *            the object to compare this double with.
2308     * @return {@code true} if the specified object is equal to this
2309     *         {@code Character}; {@code false} otherwise.
2310     */
2311    @Override
2312    public boolean equals(Object object) {
2313        return (object instanceof Character)
2314                && (value == ((Character) object).value);
2315    }
2316
2317    /**
2318     * Returns the character which represents the specified digit in the
2319     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2320     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2321     * smaller than {@code radix}. If any of these conditions does not hold, 0
2322     * is returned.
2323     *
2324     * @param digit
2325     *            the integer value.
2326     * @param radix
2327     *            the radix.
2328     * @return the character which represents the {@code digit} in the
2329     *         {@code radix}.
2330     */
2331    public static char forDigit(int digit, int radix) {
2332        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2333            if (0 <= digit && digit < radix) {
2334                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2335            }
2336        }
2337        return 0;
2338    }
2339
2340    /**
2341     * Gets the numeric value of the specified Unicode character.
2342     *
2343     * @param c
2344     *            the Unicode character to get the numeric value of.
2345     * @return a non-negative numeric integer value if a numeric value for
2346     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2347     *         -2 if the numeric value can not be represented with an integer.
2348     */
2349    public static int getNumericValue(char c) {
2350        // BEGIN android-changed
2351        return getNumericValue((int) c);
2352        // END android-changed
2353    }
2354
2355    /**
2356     * Gets the numeric value of the specified Unicode code point. For example,
2357     * the code point '\u216B' stands for the Roman number XII, which has the
2358     * numeric value 12.
2359     *
2360     * @param codePoint
2361     *            the Unicode code point to get the numeric value of.
2362     * @return a non-negative numeric integer value if a numeric value for
2363     *         {@code codePoint} exists, -1 if there is no numeric value for
2364     *         {@code codePoint}, -2 if the numeric value can not be
2365     *         represented with an integer.
2366     */
2367    public static int getNumericValue(int codePoint) {
2368        // BEGIN android-changed
2369        if (codePoint < 128) {
2370            // Optimized for ASCII
2371            if (codePoint >= '0' && codePoint <= '9') {
2372                return codePoint - '0';
2373            }
2374            if (codePoint >= 'a' && codePoint <= 'z') {
2375                return codePoint - ('a' - 10);
2376            }
2377            if (codePoint >= 'A' && codePoint <= 'Z') {
2378                return codePoint - ('A' - 10);
2379            }
2380            return -1;
2381        }
2382        return UCharacter.getNumericValue(codePoint);
2383        // END android-changed
2384    }
2385
2386    /**
2387     * Gets the general Unicode category of the specified character.
2388     *
2389     * @param c
2390     *            the character to get the category of.
2391     * @return the Unicode category of {@code c}.
2392     */
2393    public static int getType(char c) {
2394        // BEGIN android-changed
2395        return getType((int) c);
2396        // END android-changed
2397    }
2398
2399    /**
2400     * Gets the general Unicode category of the specified code point.
2401     *
2402     * @param codePoint
2403     *            the Unicode code point to get the category of.
2404     * @return the Unicode category of {@code codePoint}.
2405     */
2406    public static int getType(int codePoint) {
2407        // BEGIN android-changed
2408        // if (codePoint < 1000 && codePoint > 0) {
2409        //     return typeValuesCache[codePoint];
2410        // }
2411        // END android-changed
2412        int type = UCharacter.getType(codePoint);
2413
2414        // the type values returned by UCharacter are not compatible with what
2415        // the spec says.RI's Character type values skip the value 17.
2416        if (type <= Character.FORMAT) {
2417            return type;
2418        }
2419        return (type + 1);
2420    }
2421
2422    /**
2423     * Gets the Unicode directionality of the specified character.
2424     *
2425     * @param c
2426     *            the character to get the directionality of.
2427     * @return the Unicode directionality of {@code c}.
2428     */
2429    public static byte getDirectionality(char c) {
2430        // BEGIN android-changed
2431        // int result = BinarySearch.binarySearchRange(bidiKeys, c);
2432        // int high = bidiValues[result * 2];
2433        // if (c <= high) {
2434        //     int code = bidiValues[result * 2 + 1];
2435        //     if (code < 0x100) {
2436        //         return (byte) (code - 1);
2437        //     }
2438        //     return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1);
2439        // }
2440        // return DIRECTIONALITY_UNDEFINED;
2441        return getDirectionality((int)c);
2442        // END android-changed
2443    }
2444
2445    /**
2446     * Gets the Unicode directionality of the specified character.
2447     *
2448     * @param codePoint
2449     *            the Unicode code point to get the directionality of.
2450     * @return the Unicode directionality of {@code codePoint}.
2451     */
2452    public static byte getDirectionality(int codePoint) {
2453        if (getType(codePoint) == Character.UNASSIGNED) {
2454            return Character.DIRECTIONALITY_UNDEFINED;
2455        }
2456
2457        byte UCDirectionality = UCharacter.getDirectionality(codePoint);
2458        if (UCDirectionality == -1) {
2459            return -1;
2460        }
2461        return DIRECTIONALITY[UCDirectionality];
2462    }
2463
2464    /**
2465     * Indicates whether the specified character is mirrored.
2466     *
2467     * @param c
2468     *            the character to check.
2469     * @return {@code true} if {@code c} is mirrored; {@code false}
2470     *         otherwise.
2471     */
2472    public static boolean isMirrored(char c) {
2473        // BEGIN android-changed
2474        // int value = c / 16;
2475        // if (value >= mirrored.length) {
2476        //     return false;
2477        // }
2478        // int bit = 1 << (c % 16);
2479        // return (mirrored[value] & bit) != 0;
2480        return isMirrored((int)c);
2481        // ENd android-changed
2482    }
2483
2484    /**
2485     * Indicates whether the specified code point is mirrored.
2486     *
2487     * @param codePoint
2488     *            the code point to check.
2489     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2490     *         otherwise.
2491     */
2492    public static boolean isMirrored(int codePoint) {
2493        return UCharacter.isMirrored(codePoint);
2494    }
2495
2496    @Override
2497    public int hashCode() {
2498        return value;
2499    }
2500
2501    /**
2502     * Indicates whether the specified character is defined in the Unicode
2503     * specification.
2504     *
2505     * @param c
2506     *            the character to check.
2507     * @return {@code true} if the general Unicode category of the character is
2508     *         not {@code UNASSIGNED}; {@code false} otherwise.
2509     */
2510    public static boolean isDefined(char c) {
2511        // BEGIN android-changed
2512        // return getType(c) != UNASSIGNED;
2513        return UCharacter.isDefined(c);
2514        // END android-changed
2515    }
2516
2517    /**
2518     * Indicates whether the specified code point is defined in the Unicode
2519     * specification.
2520     *
2521     * @param codePoint
2522     *            the code point to check.
2523     * @return {@code true} if the general Unicode category of the code point is
2524     *         not {@code UNASSIGNED}; {@code false} otherwise.
2525     */
2526    public static boolean isDefined(int codePoint) {
2527        return UCharacter.isDefined(codePoint);
2528    }
2529
2530    /**
2531     * Indicates whether the specified character is a digit.
2532     *
2533     * @param c
2534     *            the character to check.
2535     * @return {@code true} if {@code c} is a digit; {@code false}
2536     *         otherwise.
2537     */
2538    public static boolean isDigit(char c) {
2539        // BEGIN android-changed
2540        return isDigit((int) c);
2541        // END android-changed
2542    }
2543
2544    /**
2545     * Indicates whether the specified code point is a digit.
2546     *
2547     * @param codePoint
2548     *            the code point to check.
2549     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2550     *         otherwise.
2551     */
2552    public static boolean isDigit(int codePoint) {
2553        // BEGIN android-changed
2554        // Optimized case for ASCII
2555        if ('0' <= codePoint && codePoint <= '9') {
2556            return true;
2557        }
2558        if (codePoint < 1632) {
2559            return false;
2560        }
2561        return UCharacter.isDigit(codePoint);
2562        // END android-changed
2563    }
2564
2565    /**
2566     * Indicates whether the specified character is ignorable in a Java or
2567     * Unicode identifier.
2568     *
2569     * @param c
2570     *            the character to check.
2571     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2572     */
2573    public static boolean isIdentifierIgnorable(char c) {
2574        // BEGIN android-changed
2575        return isIdentifierIgnorable((int) c);
2576        // END android-changed
2577    }
2578
2579    /**
2580     * Indicates whether the specified code point is ignorable in a Java or
2581     * Unicode identifier.
2582     *
2583     * @param codePoint
2584     *            the code point to check.
2585     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2586     *         otherwise.
2587     */
2588    public static boolean isIdentifierIgnorable(int codePoint) {
2589        // BEGIN android-changed
2590        if (codePoint < 0x600) {
2591            return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) ||
2592                    (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad);
2593        }
2594        return UCharacter.isIdentifierIgnorable(codePoint);
2595        // END android-changed
2596    }
2597
2598    /**
2599     * Indicates whether the specified character is an ISO control character.
2600     *
2601     * @param c
2602     *            the character to check.
2603     * @return {@code true} if {@code c} is an ISO control character;
2604     *         {@code false} otherwise.
2605     */
2606    public static boolean isISOControl(char c) {
2607        return isISOControl((int)c);
2608    }
2609
2610    /**
2611     * Indicates whether the specified code point is an ISO control character.
2612     *
2613     * @param c
2614     *            the code point to check.
2615     * @return {@code true} if {@code c} is an ISO control character;
2616     *         {@code false} otherwise.
2617     */
2618    public static boolean isISOControl(int c) {
2619        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2620    }
2621
2622    /**
2623     * Indicates whether the specified character is a valid part of a Java
2624     * identifier other than the first character.
2625     *
2626     * @param c
2627     *            the character to check.
2628     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2629     *         {@code false} otherwise.
2630     */
2631    public static boolean isJavaIdentifierPart(char c) {
2632        // BEGIN android-changed
2633        return isJavaIdentifierPart((int) c);
2634        // END android-changed
2635    }
2636
2637    /**
2638     * Indicates whether the specified code point is a valid part of a Java
2639     * identifier other than the first character.
2640     *
2641     * @param codePoint
2642     *            the code point to check.
2643     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2644     *         {@code false} otherwise.
2645     */
2646    public static boolean isJavaIdentifierPart(int codePoint) {
2647        // BEGIN android-changed: use precomputed bitmasks for the ASCII range.
2648        // Optimized case for ASCII
2649        if (codePoint < 64) {
2650            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
2651        } else if (codePoint < 128) {
2652            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2653        }
2654        int type = getType(codePoint);
2655        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2656                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2657                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2658                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2659                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
2660                || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT;
2661        // END android-changed
2662    }
2663
2664    /**
2665     * Indicates whether the specified character is a valid first character for
2666     * a Java identifier.
2667     *
2668     * @param c
2669     *            the character to check.
2670     * @return {@code true} if {@code c} is a valid first character of a Java
2671     *         identifier; {@code false} otherwise.
2672     */
2673    public static boolean isJavaIdentifierStart(char c) {
2674        // BEGIN android-changed
2675        return isJavaIdentifierStart((int) c);
2676        // END android-changed
2677    }
2678
2679    /**
2680     * Indicates whether the specified code point is a valid first character for
2681     * a Java identifier.
2682     *
2683     * @param codePoint
2684     *            the code point to check.
2685     * @return {@code true} if {@code codePoint} is a valid start of a Java
2686     *         identifier; {@code false} otherwise.
2687     */
2688    public static boolean isJavaIdentifierStart(int codePoint) {
2689        // BEGIN android-changed: use precomputed bitmasks for the ASCII range.
2690        // Optimized case for ASCII
2691        if (codePoint < 64) {
2692            return (codePoint == '$'); // There's only one character in this range.
2693        } else if (codePoint < 128) {
2694            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
2695        }
2696        int type = getType(codePoint);
2697        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL
2698                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2699        // END android-changed
2700    }
2701
2702    /**
2703     * Indicates whether the specified character is a Java letter.
2704     *
2705     * @param c
2706     *            the character to check.
2707     * @return {@code true} if {@code c} is a Java letter; {@code false}
2708     *         otherwise.
2709     * @deprecated Use {@link #isJavaIdentifierStart(char)}
2710     */
2711    @Deprecated
2712    public static boolean isJavaLetter(char c) {
2713        return isJavaIdentifierStart(c);
2714    }
2715
2716    /**
2717     * Indicates whether the specified character is a Java letter or digit
2718     * character.
2719     *
2720     * @param c
2721     *            the character to check.
2722     * @return {@code true} if {@code c} is a Java letter or digit;
2723     *         {@code false} otherwise.
2724     * @deprecated Use {@link #isJavaIdentifierPart(char)}
2725     */
2726    @Deprecated
2727    public static boolean isJavaLetterOrDigit(char c) {
2728        return isJavaIdentifierPart(c);
2729    }
2730
2731    /**
2732     * Indicates whether the specified character is a letter.
2733     *
2734     * @param c
2735     *            the character to check.
2736     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2737     */
2738    public static boolean isLetter(char c) {
2739        // BEGIN android-changed
2740        return isLetter((int) c);
2741        // END android-changed
2742    }
2743
2744    /**
2745     * Indicates whether the specified code point is a letter.
2746     *
2747     * @param codePoint
2748     *            the code point to check.
2749     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2750     *         otherwise.
2751     */
2752    public static boolean isLetter(int codePoint) {
2753        // BEGIN android-changed
2754        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2755            return true;
2756        }
2757        if (codePoint < 128) {
2758            return false;
2759        }
2760        return UCharacter.isLetter(codePoint);
2761        // END android-changed
2762    }
2763
2764    /**
2765     * Indicates whether the specified character is a letter or a digit.
2766     *
2767     * @param c
2768     *            the character to check.
2769     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2770     *         otherwise.
2771     */
2772    public static boolean isLetterOrDigit(char c) {
2773        // BEGIN android-changed
2774        return isLetterOrDigit((int) c);
2775        // END android-changed
2776    }
2777
2778    /**
2779     * Indicates whether the specified code point is a letter or a digit.
2780     *
2781     * @param codePoint
2782     *            the code point to check.
2783     * @return {@code true} if {@code codePoint} is a letter or a digit;
2784     *         {@code false} otherwise.
2785     */
2786    public static boolean isLetterOrDigit(int codePoint) {
2787        // BEGIN android-changed
2788        // Optimized case for ASCII
2789        if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) {
2790            return true;
2791        }
2792        if ('0' <= codePoint && codePoint <= '9') {
2793            return true;
2794        }
2795        if (codePoint < 128) {
2796            return false;
2797        }
2798        return UCharacter.isLetterOrDigit(codePoint);
2799        // END android-changed
2800    }
2801
2802    /**
2803     * Indicates whether the specified character is a lower case letter.
2804     *
2805     * @param c
2806     *            the character to check.
2807     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2808     *         otherwise.
2809     */
2810    public static boolean isLowerCase(char c) {
2811        // BEGIN android-changed
2812        return isLowerCase((int) c);
2813        // END android-changed
2814    }
2815
2816    /**
2817     * Indicates whether the specified code point is a lower case letter.
2818     *
2819     * @param codePoint
2820     *            the code point to check.
2821     * @return {@code true} if {@code codePoint} is a lower case letter;
2822     *         {@code false} otherwise.
2823     */
2824    public static boolean isLowerCase(int codePoint) {
2825        // BEGIN android-changed
2826        // Optimized case for ASCII
2827        if ('a' <= codePoint && codePoint <= 'z') {
2828            return true;
2829        }
2830        if (codePoint < 128) {
2831            return false;
2832        }
2833        // END android-changed
2834        return UCharacter.isLowerCase(codePoint);
2835    }
2836
2837    /**
2838     * Indicates whether the specified character is a Java space.
2839     *
2840     * @param c
2841     *            the character to check.
2842     * @return {@code true} if {@code c} is a Java space; {@code false}
2843     *         otherwise.
2844     * @deprecated Use {@link #isWhitespace(char)}
2845     */
2846    @Deprecated
2847    public static boolean isSpace(char c) {
2848        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2849    }
2850
2851    /**
2852     * Indicates whether the specified character is a Unicode space character.
2853     * That is, if it is a member of one of the Unicode categories Space
2854     * Separator, Line Separator, or Paragraph Separator.
2855     *
2856     * @param c
2857     *            the character to check.
2858     * @return {@code true} if {@code c} is a Unicode space character,
2859     *         {@code false} otherwise.
2860     */
2861    public static boolean isSpaceChar(char c) {
2862        // BEGIN android-changed
2863        return isSpaceChar((int) c);
2864        // END android-changed
2865    }
2866
2867    /**
2868     * Indicates whether the specified code point is a Unicode space character.
2869     * That is, if it is a member of one of the Unicode categories Space
2870     * Separator, Line Separator, or Paragraph Separator.
2871     *
2872     * @param codePoint
2873     *            the code point to check.
2874     * @return {@code true} if {@code codePoint} is a Unicode space character,
2875     *         {@code false} otherwise.
2876     */
2877    public static boolean isSpaceChar(int codePoint) {
2878        // BEGIN android-changed
2879        if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) {
2880            return true;
2881        }
2882        if (codePoint < 0x2000) {
2883            return false;
2884        }
2885        if (codePoint <= 0xffff) {
2886            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
2887                    codePoint == 0x202f || codePoint == 0x3000;
2888        }
2889        return UCharacter.isSpaceChar(codePoint);
2890        // END android-changed
2891    }
2892
2893    /**
2894     * Indicates whether the specified character is a titlecase character.
2895     *
2896     * @param c
2897     *            the character to check.
2898     * @return {@code true} if {@code c} is a titlecase character, {@code false}
2899     *         otherwise.
2900     */
2901    public static boolean isTitleCase(char c) {
2902        // BEGIN android-changed
2903        return UCharacter.isTitleCase(c);
2904        // END android-changed
2905    }
2906
2907    /**
2908     * Indicates whether the specified code point is a titlecase character.
2909     *
2910     * @param codePoint
2911     *            the code point to check.
2912     * @return {@code true} if {@code codePoint} is a titlecase character,
2913     *         {@code false} otherwise.
2914     */
2915    public static boolean isTitleCase(int codePoint) {
2916        return UCharacter.isTitleCase(codePoint);
2917    }
2918
2919    /**
2920     * Indicates whether the specified character is valid as part of a Unicode
2921     * identifier other than the first character.
2922     *
2923     * @param c
2924     *            the character to check.
2925     * @return {@code true} if {@code c} is valid as part of a Unicode
2926     *         identifier; {@code false} otherwise.
2927     */
2928    public static boolean isUnicodeIdentifierPart(char c) {
2929        // BEGIN android-changed
2930        // int type = getType(c);
2931        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2932        //         || type == CONNECTOR_PUNCTUATION
2933        //         || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2934        //         || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK
2935        //         || isIdentifierIgnorable(c);
2936        return UCharacter.isUnicodeIdentifierPart(c);
2937        // END android-changed
2938    }
2939
2940    /**
2941     * Indicates whether the specified code point is valid as part of a Unicode
2942     * identifier other than the first character.
2943     *
2944     * @param codePoint
2945     *            the code point to check.
2946     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
2947     *         identifier; {@code false} otherwise.
2948     */
2949    public static boolean isUnicodeIdentifierPart(int codePoint) {
2950        return UCharacter.isUnicodeIdentifierPart(codePoint);
2951    }
2952
2953    /**
2954     * Indicates whether the specified character is a valid initial character
2955     * for a Unicode identifier.
2956     *
2957     * @param c
2958     *            the character to check.
2959     * @return {@code true} if {@code c} is a valid first character for a
2960     *         Unicode identifier; {@code false} otherwise.
2961     */
2962    public static boolean isUnicodeIdentifierStart(char c) {
2963        // BEGIN android-changed
2964        // int type = getType(c);
2965        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2966        //         || type == LETTER_NUMBER;
2967        return UCharacter.isUnicodeIdentifierStart(c);
2968        // END android-changed
2969    }
2970
2971    /**
2972     * Indicates whether the specified code point is a valid initial character
2973     * for a Unicode identifier.
2974     *
2975     * @param codePoint
2976     *            the code point to check.
2977     * @return {@code true} if {@code codePoint} is a valid first character for
2978     *         a Unicode identifier; {@code false} otherwise.
2979     */
2980    public static boolean isUnicodeIdentifierStart(int codePoint) {
2981        return UCharacter.isUnicodeIdentifierStart(codePoint);
2982    }
2983
2984    /**
2985     * Indicates whether the specified character is an upper case letter.
2986     *
2987     * @param c
2988     *            the character to check.
2989     * @return {@code true} if {@code c} is a upper case letter; {@code false}
2990     *         otherwise.
2991     */
2992    public static boolean isUpperCase(char c) {
2993        // BEGIN android-changed
2994        return isUpperCase((int) c);
2995        // END android-changed
2996    }
2997
2998    /**
2999     * Indicates whether the specified code point is an upper case letter.
3000     *
3001     * @param codePoint
3002     *            the code point to check.
3003     * @return {@code true} if {@code codePoint} is a upper case letter;
3004     *         {@code false} otherwise.
3005     */
3006    public static boolean isUpperCase(int codePoint) {
3007        // BEGIN android-changed
3008        // Optimized case for ASCII
3009        if ('A' <= codePoint && codePoint <= 'Z') {
3010            return true;
3011        }
3012        if (codePoint < 128) {
3013            return false;
3014        }
3015        return UCharacter.isUpperCase(codePoint);
3016        // END android-changed
3017    }
3018
3019    /**
3020     * Indicates whether the specified character is a whitespace character in
3021     * Java.
3022     *
3023     * @param c
3024     *            the character to check.
3025     * @return {@code true} if the supplied {@code c} is a whitespace character
3026     *         in Java; {@code false} otherwise.
3027     */
3028    public static boolean isWhitespace(char c) {
3029        // BEGIN android-changed
3030        return isWhitespace((int) c);
3031        // END android-changed
3032    }
3033
3034    /**
3035     * Indicates whether the specified code point is a whitespace character in
3036     * Java.
3037     *
3038     * @param codePoint
3039     *            the code point to check.
3040     * @return {@code true} if the supplied {@code c} is a whitespace character
3041     *         in Java; {@code false} otherwise.
3042     */
3043    public static boolean isWhitespace(int codePoint) {
3044        // BEGIN android-changed
3045        // Optimized case for ASCII
3046        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) {
3047            return true;
3048        }
3049        if (codePoint == 0x1680) {
3050            return true;
3051        }
3052        if (codePoint < 0x2000 || codePoint == 0x2007) {
3053            return false;
3054        }
3055        if (codePoint <= 0xffff) {
3056            return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 ||
3057                    codePoint == 0x3000;
3058        }
3059        return UCharacter.isWhitespace(codePoint);
3060        // END android-changed
3061    }
3062
3063    /**
3064     * Reverses the order of the first and second byte in the specified
3065     * character.
3066     *
3067     * @param c
3068     *            the character to reverse.
3069     * @return the character with reordered bytes.
3070     */
3071    public static char reverseBytes(char c) {
3072        return (char)((c<<8) | (c>>8));
3073    }
3074
3075    /**
3076     * Returns the lower case equivalent for the specified character if the
3077     * character is an upper case letter. Otherwise, the specified character is
3078     * returned unchanged.
3079     *
3080     * @param c
3081     *            the character
3082     * @return if {@code c} is an upper case character then its lower case
3083     *         counterpart, otherwise just {@code c}.
3084     */
3085    public static char toLowerCase(char c) {
3086        // BEGIN android-changed
3087        return (char) toLowerCase((int) c);
3088        // END android-changed
3089    }
3090
3091    /**
3092     * Returns the lower case equivalent for the specified code point if it is
3093     * an upper case letter. Otherwise, the specified code point is returned
3094     * unchanged.
3095     *
3096     * @param codePoint
3097     *            the code point to check.
3098     * @return if {@code codePoint} is an upper case character then its lower
3099     *         case counterpart, otherwise just {@code codePoint}.
3100     */
3101    public static int toLowerCase(int codePoint) {
3102        // BEGIN android-changed
3103        // Optimized case for ASCII
3104        if ('A' <= codePoint && codePoint <= 'Z') {
3105            return (char) (codePoint + ('a' - 'A'));
3106        }
3107        if (codePoint < 192) {
3108            return codePoint;
3109        }
3110        return UCharacter.toLowerCase(codePoint);
3111        // END android-changed
3112    }
3113
3114    @Override
3115    public String toString() {
3116        return String.valueOf(value);
3117    }
3118
3119    /**
3120     * Converts the specified character to its string representation.
3121     *
3122     * @param value
3123     *            the character to convert.
3124     * @return the character converted to a string.
3125     */
3126    public static String toString(char value) {
3127        return String.valueOf(value);
3128    }
3129
3130    /**
3131     * Returns the title case equivalent for the specified character if it
3132     * exists. Otherwise, the specified character is returned unchanged.
3133     *
3134     * @param c
3135     *            the character to convert.
3136     * @return the title case equivalent of {@code c} if it exists, otherwise
3137     *         {@code c}.
3138     */
3139    public static char toTitleCase(char c) {
3140        // BEGIN android-changed
3141        // if (isTitleCase(c)) {
3142        //     return c;
3143        // }
3144        // int result = BinarySearch.binarySearch(titlecaseKeys, c);
3145        // if (result >= 0) {
3146        //     return titlecaseValues[result];
3147        // }
3148        // return toUpperCase(c);
3149        return (char)UCharacter.toTitleCase(c);
3150        // ENd android-changed
3151    }
3152
3153    /**
3154     * Returns the title case equivalent for the specified code point if it
3155     * exists. Otherwise, the specified code point is returned unchanged.
3156     *
3157     * @param codePoint
3158     *            the code point to convert.
3159     * @return the title case equivalent of {@code codePoint} if it exists,
3160     *         otherwise {@code codePoint}.
3161     */
3162    public static int toTitleCase(int codePoint) {
3163        return UCharacter.toTitleCase(codePoint);
3164    }
3165
3166    /**
3167     * Returns the upper case equivalent for the specified character if the
3168     * character is a lower case letter. Otherwise, the specified character is
3169     * returned unchanged.
3170     *
3171     * @param c
3172     *            the character to convert.
3173     * @return if {@code c} is a lower case character then its upper case
3174     *         counterpart, otherwise just {@code c}.
3175     */
3176    public static char toUpperCase(char c) {
3177        // BEGIN android-changed
3178        return (char) toUpperCase((int) c);
3179        // END android-changed
3180    }
3181
3182    /**
3183     * Returns the upper case equivalent for the specified code point if the
3184     * code point is a lower case letter. Otherwise, the specified code point is
3185     * returned unchanged.
3186     *
3187     * @param codePoint
3188     *            the code point to convert.
3189     * @return if {@code codePoint} is a lower case character then its upper
3190     *         case counterpart, otherwise just {@code codePoint}.
3191     */
3192    public static int toUpperCase(int codePoint) {
3193        // BEGIN android-changed
3194        // Optimized case for ASCII
3195        if ('a' <= codePoint && codePoint <= 'z') {
3196            return (char) (codePoint - ('a' - 'A'));
3197        }
3198        if (codePoint < 181) {
3199            return codePoint;
3200        }
3201        return UCharacter.toUpperCase(codePoint);
3202        // END android-changed
3203    }
3204}
3205