Character.java revision 9a501d6cb2a26c3b5d77497826ea33481716ab2d
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21// BEGIN android-removed
22// import java.util.SortedMap;
23// import java.util.TreeMap;
24//
25// import org.apache.harmony.luni.util.BinarySearch;
26// END android-removed
27
28// BEGIN android-changed
29import com.ibm.icu4jni.lang.UCharacter;
30// END android-changed
31
32/**
33 * The wrapper for the primitive type {@code char}. This class also provides a
34 * number of utility methods for working with characters.
35 * <p>
36 * Character data is based upon the Unicode Standard, 4.0. The Unicode
37 * specification, character tables and other information are available at <a
38 * href="http://www.unicode.org/">http://www.unicode.org/</a>.
39 * <p>
40 * Unicode characters are referred to as <i>code points</i>. The range of valid
41 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
42 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
43 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
44 * encoding and {@code char} pairs are used to represent code points in the
45 * supplementary range. A pair of {@code char} values that represent a
46 * supplementary character are made up of a <i>high surrogate</i> with a value
47 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
48 * 0xDC00 to 0xDFFF.
49 * <p>
50 * On the Java platform a {@code char} value represents either a single BMP code
51 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
52 * is used to represent all Unicode code points.
53 *
54 * @since 1.0
55 */
56public final class Character implements Serializable, Comparable<Character> {
57    private static final long serialVersionUID = 3786198910865385080L;
58
59    private final char value;
60
61    /**
62     * The minimum {@code Character} value.
63     */
64    public static final char MIN_VALUE = '\u0000';
65
66    /**
67     * The maximum {@code Character} value.
68     */
69    public static final char MAX_VALUE = '\uffff';
70
71    /**
72     * The minimum radix used for conversions between characters and integers.
73     */
74    public static final int MIN_RADIX = 2;
75
76    /**
77     * The maximum radix used for conversions between characters and integers.
78     */
79    public static final int MAX_RADIX = 36;
80
81    /**
82     * The {@link Class} object that represents the primitive type {@code char}.
83     */
84    @SuppressWarnings("unchecked")
85    public static final Class<Character> TYPE = (Class<Character>) new char[0]
86            .getClass().getComponentType();
87
88    // Note: This can't be set to "char.class", since *that* is
89    // defined to be "java.lang.Character.TYPE";
90
91    /**
92     * Unicode category constant Cn.
93     */
94    public static final byte UNASSIGNED = 0;
95
96    /**
97     * Unicode category constant Lu.
98     */
99    public static final byte UPPERCASE_LETTER = 1;
100
101    /**
102     * Unicode category constant Ll.
103     */
104    public static final byte LOWERCASE_LETTER = 2;
105
106    /**
107     * Unicode category constant Lt.
108     */
109    public static final byte TITLECASE_LETTER = 3;
110
111    /**
112     * Unicode category constant Lm.
113     */
114    public static final byte MODIFIER_LETTER = 4;
115
116    /**
117     * Unicode category constant Lo.
118     */
119    public static final byte OTHER_LETTER = 5;
120
121    /**
122     * Unicode category constant Mn.
123     */
124    public static final byte NON_SPACING_MARK = 6;
125
126    /**
127     * Unicode category constant Me.
128     */
129    public static final byte ENCLOSING_MARK = 7;
130
131    /**
132     * Unicode category constant Mc.
133     */
134    public static final byte COMBINING_SPACING_MARK = 8;
135
136    /**
137     * Unicode category constant Nd.
138     */
139    public static final byte DECIMAL_DIGIT_NUMBER = 9;
140
141    /**
142     * Unicode category constant Nl.
143     */
144    public static final byte LETTER_NUMBER = 10;
145
146    /**
147     * Unicode category constant No.
148     */
149    public static final byte OTHER_NUMBER = 11;
150
151    /**
152     * Unicode category constant Zs.
153     */
154    public static final byte SPACE_SEPARATOR = 12;
155
156    /**
157     * Unicode category constant Zl.
158     */
159    public static final byte LINE_SEPARATOR = 13;
160
161    /**
162     * Unicode category constant Zp.
163     */
164    public static final byte PARAGRAPH_SEPARATOR = 14;
165
166    /**
167     * Unicode category constant Cc.
168     */
169    public static final byte CONTROL = 15;
170
171    /**
172     * Unicode category constant Cf.
173     */
174    public static final byte FORMAT = 16;
175
176    /**
177     * Unicode category constant Co.
178     */
179    public static final byte PRIVATE_USE = 18;
180
181    /**
182     * Unicode category constant Cs.
183     */
184    public static final byte SURROGATE = 19;
185
186    /**
187     * Unicode category constant Pd.
188     */
189    public static final byte DASH_PUNCTUATION = 20;
190
191    /**
192     * Unicode category constant Ps.
193     */
194    public static final byte START_PUNCTUATION = 21;
195
196    /**
197     * Unicode category constant Pe.
198     */
199    public static final byte END_PUNCTUATION = 22;
200
201    /**
202     * Unicode category constant Pc.
203     */
204    public static final byte CONNECTOR_PUNCTUATION = 23;
205
206    /**
207     * Unicode category constant Po.
208     */
209    public static final byte OTHER_PUNCTUATION = 24;
210
211    /**
212     * Unicode category constant Sm.
213     */
214    public static final byte MATH_SYMBOL = 25;
215
216    /**
217     * Unicode category constant Sc.
218     */
219    public static final byte CURRENCY_SYMBOL = 26;
220
221    /**
222     * Unicode category constant Sk.
223     */
224    public static final byte MODIFIER_SYMBOL = 27;
225
226    /**
227     * Unicode category constant So.
228     */
229    public static final byte OTHER_SYMBOL = 28;
230
231    /**
232     * Unicode category constant Pi.
233     *
234     * @since 1.4
235     */
236    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
237
238    /**
239     * Unicode category constant Pf.
240     *
241     * @since 1.4
242     */
243    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
244
245    /**
246     * Unicode bidirectional constant.
247     *
248     * @since 1.4
249     */
250    public static final byte DIRECTIONALITY_UNDEFINED = -1;
251
252    /**
253     * Unicode bidirectional constant L.
254     *
255     * @since 1.4
256     */
257    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
258
259    /**
260     * Unicode bidirectional constant R.
261     *
262     * @since 1.4
263     */
264    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
265
266    /**
267     * Unicode bidirectional constant AL.
268     *
269     * @since 1.4
270     */
271    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
272
273    /**
274     * Unicode bidirectional constant EN.
275     *
276     * @since 1.4
277     */
278    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
279
280    /**
281     * Unicode bidirectional constant ES.
282     *
283     * @since 1.4
284     */
285    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
286
287    /**
288     * Unicode bidirectional constant ET.
289     *
290     * @since 1.4
291     */
292    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
293
294    /**
295     * Unicode bidirectional constant AN.
296     *
297     * @since 1.4
298     */
299    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
300
301    /**
302     * Unicode bidirectional constant CS.
303     *
304     * @since 1.4
305     */
306    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
307
308    /**
309     * Unicode bidirectional constant NSM.
310     *
311     * @since 1.4
312     */
313    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
314
315    /**
316     * Unicode bidirectional constant BN.
317     *
318     * @since 1.4
319     */
320    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
321
322    /**
323     * Unicode bidirectional constant B.
324     *
325     * @since 1.4
326     */
327    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
328
329    /**
330     * Unicode bidirectional constant S.
331     *
332     * @since 1.4
333     */
334    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
335
336    /**
337     * Unicode bidirectional constant WS.
338     *
339     * @since 1.4
340     */
341    public static final byte DIRECTIONALITY_WHITESPACE = 12;
342
343    /**
344     * Unicode bidirectional constant ON.
345     *
346     * @since 1.4
347     */
348    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
349
350    /**
351     * Unicode bidirectional constant LRE.
352     *
353     * @since 1.4
354     */
355    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
356
357    /**
358     * Unicode bidirectional constant LRO.
359     *
360     * @since 1.4
361     */
362    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
363
364    /**
365     * Unicode bidirectional constant RLE.
366     *
367     * @since 1.4
368     */
369    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
370
371    /**
372     * Unicode bidirectional constant RLO.
373     *
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
377
378    /**
379     * Unicode bidirectional constant PDF.
380     *
381     * @since 1.4
382     */
383    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
384
385    /**
386     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
387     * encoding, {@code '\uD800'}.
388     *
389     * @since 1.5
390     */
391    public static final char MIN_HIGH_SURROGATE = '\uD800';
392
393    /**
394     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
395     * encoding, {@code '\uDBFF'}.
396     *
397     * @since 1.5
398     */
399    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
400
401    /**
402     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
403     * encoding, {@code '\uDC00'}.
404     *
405     * @since 1.5
406     */
407    public static final char MIN_LOW_SURROGATE = '\uDC00';
408
409    /**
410     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
411     * encoding, {@code '\uDFFF'}.
412     *
413     * @since 1.5
414     */
415    public static final char MAX_LOW_SURROGATE = '\uDFFF';
416
417    /**
418     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
419     *
420     * @since 1.5
421     */
422    public static final char MIN_SURROGATE = '\uD800';
423
424    /**
425     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
426     *
427     * @since 1.5
428     */
429    public static final char MAX_SURROGATE = '\uDFFF';
430
431    /**
432     * The minimum value of a supplementary code point, {@code U+010000}.
433     *
434     * @since 1.5
435     */
436    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
437
438    /**
439     * The minimum code point value, {@code U+0000}.
440     *
441     * @since 1.5
442     */
443    public static final int MIN_CODE_POINT = 0x000000;
444
445    /**
446     * The maximum code point value, {@code U+10FFFF}.
447     *
448     * @since 1.5
449     */
450    public static final int MAX_CODE_POINT = 0x10FFFF;
451
452    /**
453     * The number of bits required to represent a {@code Character} value
454     * unsigned form.
455     *
456     * @since 1.5
457     */
458    public static final int SIZE = 16;
459
460    // BEGIN android-removed
461    // Unicode 3.0.1 (same as Unicode 3.0.0)
462    // private static final String bidiKeys = ...
463
464    // private static final char[] bidiValues = ...
465
466    // private static final char[] mirrored = ...
467
468    // Unicode 3.0.1 (same as Unicode 3.0.0)
469    // private static final String typeKeys = ...
470
471    // private static final char[] typeValues = ...
472
473    // private static final int[] typeValuesCache = ...
474
475    // Unicode 3.0.1 (same as Unicode 3.0.0)
476    // private static final String uppercaseKeys = ...
477
478    // private static final char[] uppercaseValues = ...
479
480    // private static final int[] uppercaseValuesCache = ...
481
482    // private static final String lowercaseKeys = ...
483
484    // private static final char[] lowercaseValues = ...
485
486    // private static final int[] lowercaseValuesCache = ...
487
488    // private static final String digitKeys = ...
489
490    // private static final char[] digitValues = ...
491    // END android-removed
492
493    // BEGIN android-note
494    // put this in a helper class so that it's only initialized on demand?
495    // END android-note
496    private static final char[] typeTags = "\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0003\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0002"
497            .getValue();
498
499    // BEGIN android-note
500    // put this in a helper class so that it's only initialized on demand?
501    // END android-note
502    private static final byte[] DIRECTIONALITY = new byte[] {
503            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
504            DIRECTIONALITY_EUROPEAN_NUMBER,
505            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
506            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
507            DIRECTIONALITY_ARABIC_NUMBER,
508            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
509            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
510            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
511            DIRECTIONALITY_OTHER_NEUTRALS,
512            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
513            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
514            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
515            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
516            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
517            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
518            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
519
520    private static final int ISJAVASTART = 1;
521
522    private static final int ISJAVAPART = 2;
523
524    // BEGIN android-removed
525    // Unicode 3.0.1 (same as Unicode 3.0.0)
526    // private static final String titlecaseKeys = ...
527
528    // private static final char[] titlecaseValues = ...
529
530    // Unicode 3.0.0 (NOT the same as Unicode 3.0.1)
531    // private static final String numericKeys = ...
532
533    // private static final char[] numericValues = ...
534    // END android-removed
535
536    /*
537     * Represents a subset of the Unicode character set.
538     */
539    public static class Subset {
540        String name;
541
542        /**
543         * Constructs a new {@code Subset}.
544         *
545         * @param string
546         *            this subset's name.
547         */
548        protected Subset(String string) {
549            if (string == null) {
550                throw new NullPointerException();
551            }
552            name = string;
553        }
554
555        /**
556         * Compares this character subset with the specified object. Uses
557         * {@link java.lang.Object#equals(Object)} to do the comparison.
558         *
559         * @param object
560         *            the object to compare this character subset with.
561         * @return {@code true} if {@code object} is this subset, that is, if
562         *         {@code object == this}; {@code false} otherwise.
563         */
564        @Override
565        public final boolean equals(Object object) {
566            return super.equals(object);
567        }
568
569        /**
570         * Returns the integer hash code for this character subset.
571         *
572         * @return this subset's hash code, which is the hash code computed by
573         *         {@link java.lang.Object#hashCode()}.
574         */
575        @Override
576        public final int hashCode() {
577            return super.hashCode();
578        }
579
580        /**
581         * Returns the string representation of this subset.
582         *
583         * @return this subset's name.
584         */
585        @Override
586        public final String toString() {
587            return name;
588        }
589    }
590
591    /**
592     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
593     * specification.
594     *
595     * @since 1.2
596     */
597    public static final class UnicodeBlock extends Subset {
598        /**
599         * The &quot;Surrogates Area&quot; Unicode Block.
600         *
601         * @deprecated As of Java 5, this block has been replaced by
602         *             {@link #HIGH_SURROGATES},
603         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
604         *             {@link #LOW_SURROGATES}.
605         */
606        @Deprecated
607        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
608        /**
609         * The &quot;Basic Latin&quot; Unicode Block.
610         *
611         * @since 1.2
612         */
613        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
614        /**
615         * The &quot;Latin-1 Supplement&quot; Unicode Block.
616         *
617         * @since 1.2
618         */
619        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
620        /**
621         * The &quot;Latin Extended-A&quot; Unicode Block.
622         *
623         * @since 1.2
624         */
625        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
626        /**
627         * The &quot;Latin Extended-B&quot; Unicode Block.
628         *
629         * @since 1.2
630         */
631        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
632        /**
633         * The &quot;IPA Extensions&quot; Unicode Block.
634         *
635         * @since 1.2
636         */
637        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
638        /**
639         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
640         *
641         * @since 1.2
642         */
643        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
644        /**
645         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
646         *
647         * @since 1.2
648         */
649        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
650        /**
651         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
652         * to as &quot;Greek&quot;.
653         *
654         * @since 1.2
655         */
656        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
657        /**
658         * The &quot;Cyrillic&quot; Unicode Block.
659         *
660         * @since 1.2
661         */
662        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
663        /**
664         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
665         * referred to as &quot;Cyrillic Supplementary&quot;.
666         *
667         * @since 1.5
668         */
669        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
670        /**
671         * The &quot;Armenian&quot; Unicode Block.
672         *
673         * @since 1.2
674         */
675        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
676        /**
677         * The &quot;Hebrew&quot; Unicode Block.
678         *
679         * @since 1.2
680         */
681        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
682        /**
683         * The &quot;Arabic&quot; Unicode Block.
684         *
685         * @since 1.2
686         */
687        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
688        /**
689         * The &quot;Syriac&quot; Unicode Block.
690         *
691         * @since 1.4
692         */
693        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
694        /**
695         * The &quot;Thaana&quot; Unicode Block.
696         *
697         * @since 1.4
698         */
699        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
700        /**
701         * The &quot;Devanagari&quot; Unicode Block.
702         *
703         * @since 1.2
704         */
705        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
706        /**
707         * The &quot;Bengali&quot; Unicode Block.
708         *
709         * @since 1.2
710         */
711        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
712        /**
713         * The &quot;Gurmukhi&quot; Unicode Block.
714         *
715         * @since 1.2
716         */
717        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
718        /**
719         * The &quot;Gujarati&quot; Unicode Block.
720         *
721         * @since 1.2
722         */
723        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
724        /**
725         * The &quot;Oriya&quot; Unicode Block.
726         *
727         * @since 1.2
728         */
729        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
730        /**
731         * The &quot;Tamil&quot; Unicode Block.
732         *
733         * @since 1.2
734         */
735        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
736        /**
737         * The &quot;Telugu&quot; Unicode Block.
738         *
739         * @since 1.2
740         */
741        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
742        /**
743         * The &quot;Kannada&quot; Unicode Block.
744         *
745         * @since 1.2
746         */
747        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
748        /**
749         * The &quot;Malayalam&quot; Unicode Block.
750         *
751         * @since 1.2
752         */
753        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
754        /**
755         * The &quot;Sinhala&quot; Unicode Block.
756         *
757         * @since 1.4
758         */
759        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
760        /**
761         * The &quot;Thai&quot; Unicode Block.
762         *
763         * @since 1.2
764         */
765        public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
766        /**
767         * The &quot;Lao&quot; Unicode Block.
768         *
769         * @since 1.2
770         */
771        public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
772        /**
773         * The &quot;Tibetan&quot; Unicode Block.
774         *
775         * @since 1.2
776         */
777        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
778        /**
779         * The &quot;Myanmar&quot; Unicode Block.
780         *
781         * @since 1.4
782         */
783        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
784        /**
785         * The &quot;Georgian&quot; Unicode Block.
786         *
787         * @since 1.2
788         */
789        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
790        /**
791         * The &quot;Hangul Jamo&quot; Unicode Block.
792         *
793         * @since 1.2
794         */
795        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
796        /**
797         * The &quot;Ethiopic&quot; Unicode Block.
798         *
799         * @since 1.4
800         */
801        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
802        /**
803         * The &quot;Cherokee&quot; Unicode Block.
804         *
805         * @since 1.4
806         */
807        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
808        /**
809         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
810         *
811         * @since 1.4
812         */
813        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
814        /**
815         * The &quot;Ogham&quot; Unicode Block.
816         *
817         * @since 1.4
818         */
819        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
820        /**
821         * The &quot;Runic&quot; Unicode Block.
822         *
823         * @since 1.4
824         */
825        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
826        /**
827         * The &quot;Tagalog&quot; Unicode Block.
828         *
829         * @since 1.5
830         */
831        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
832        /**
833         * The &quot;Hanunoo&quot; Unicode Block.
834         *
835         * @since 1.5
836         */
837        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
838        /**
839         * The &quot;Buhid&quot; Unicode Block.
840         *
841         * @since 1.5
842         */
843        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
844        /**
845         * The &quot;Tagbanwa&quot; Unicode Block.
846         *
847         * @since 1.5
848         */
849        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
850        /**
851         * The &quot;Khmer&quot; Unicode Block.
852         *
853         * @since 1.4
854         */
855        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
856        /**
857         * The &quot;Mongolian&quot; Unicode Block.
858         *
859         * @since 1.4
860         */
861        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
862        /**
863         * The &quot;Limbu&quot; Unicode Block.
864         *
865         * @since 1.5
866         */
867        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
868        /**
869         * The &quot;Tai Le&quot; Unicode Block.
870         *
871         * @since 1.5
872         */
873        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
874        /**
875         * The &quot;Khmer Symbols&quot; Unicode Block.
876         *
877         * @since 1.5
878         */
879        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
880        /**
881         * The &quot;Phonetic Extensions&quot; Unicode Block.
882         *
883         * @since 1.5
884         */
885        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
886        /**
887         * The &quot;Latin Extended Additional&quot; Unicode Block.
888         *
889         * @since 1.2
890         */
891        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
892        /**
893         * The &quot;Greek Extended&quot; Unicode Block.
894         *
895         * @since 1.2
896         */
897        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
898        /**
899         * The &quot;General Punctuation&quot; Unicode Block.
900         *
901         * @since 1.2
902         */
903        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
904        /**
905         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
906         *
907         * @since 1.2
908         */
909        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
910        /**
911         * The &quot;Currency Symbols&quot; Unicode Block.
912         *
913         * @since 1.2
914         */
915        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
916        /**
917         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
918         * Block. Previously referred to as &quot;Combining Marks for
919         * Symbols&quot;.
920         *
921         * @since 1.2
922         */
923        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
924        /**
925         * The &quot;Letterlike Symbols&quot; Unicode Block.
926         *
927         * @since 1.2
928         */
929        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
930        /**
931         * The &quot;Number Forms&quot; Unicode Block.
932         *
933         * @since 1.2
934         */
935        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
936        /**
937         * The &quot;Arrows&quot; Unicode Block.
938         *
939         * @since 1.2
940         */
941        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
942        /**
943         * The &quot;Mathematical Operators&quot; Unicode Block.
944         *
945         * @since 1.2
946         */
947        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
948        /**
949         * The &quot;Miscellaneous Technical&quot; Unicode Block.
950         *
951         * @since 1.2
952         */
953        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
954        /**
955         * The &quot;Control Pictures&quot; Unicode Block.
956         *
957         * @since 1.2
958         */
959        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
960        /**
961         * The &quot;Optical Character Recognition&quot; Unicode Block.
962         *
963         * @since 1.2
964         */
965        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
966        /**
967         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
968         *
969         * @since 1.2
970         */
971        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
972        /**
973         * The &quot;Box Drawing&quot; Unicode Block.
974         *
975         * @since 1.2
976         */
977        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
978        /**
979         * The &quot;Block Elements&quot; Unicode Block.
980         *
981         * @since 1.2
982         */
983        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
984        /**
985         * The &quot;Geometric Shapes&quot; Unicode Block.
986         *
987         * @since 1.2
988         */
989        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
990        /**
991         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
992         *
993         * @since 1.2
994         */
995        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
996        /**
997         * The &quot;Dingbats&quot; Unicode Block.
998         *
999         * @since 1.2
1000         */
1001        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
1002        /**
1003         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
1004         *
1005         * @since 1.5
1006         */
1007        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
1008        /**
1009         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
1010         *
1011         * @since 1.5
1012         */
1013        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
1014        /**
1015         * The &quot;Braille Patterns&quot; Unicode Block.
1016         *
1017         * @since 1.4
1018         */
1019        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
1020        /**
1021         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
1022         *
1023         * @since 1.5
1024         */
1025        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1026        /**
1027         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
1028         *
1029         * @since 1.5
1030         */
1031        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1032        /**
1033         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1034         *
1035         * @since 1.5
1036         */
1037        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1038        /**
1039         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1040         *
1041         * @since 1.2
1042         */
1043        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1044        /**
1045         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1046         *
1047         * @since 1.4
1048         */
1049        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1050        /**
1051         * The &quot;Kangxi Radicals&quot; Unicode Block.
1052         *
1053         * @since 1.4
1054         */
1055        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
1056        /**
1057         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1058         *
1059         * @since 1.4
1060         */
1061        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1062        /**
1063         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1064         *
1065         * @since 1.2
1066         */
1067        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1068        /**
1069         * The &quot;Hiragana&quot; Unicode Block.
1070         *
1071         * @since 1.2
1072         */
1073        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
1074        /**
1075         * The &quot;Katakana&quot; Unicode Block.
1076         *
1077         * @since 1.2
1078         */
1079        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
1080        /**
1081         * The &quot;Bopomofo&quot; Unicode Block.
1082         *
1083         * @since 1.2
1084         */
1085        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
1086        /**
1087         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1088         *
1089         * @since 1.2
1090         */
1091        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1092        /**
1093         * The &quot;Kanbun&quot; Unicode Block.
1094         *
1095         * @since 1.2
1096         */
1097        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
1098        /**
1099         * The &quot;Bopomofo Extended&quot; Unicode Block.
1100         *
1101         * @since 1.4
1102         */
1103        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1104        /**
1105         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1106         *
1107         * @since 1.5
1108         */
1109        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1110        /**
1111         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1112         *
1113         * @since 1.2
1114         */
1115        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1116        /**
1117         * The &quot;CJK Compatibility&quot; Unicode Block.
1118         *
1119         * @since 1.2
1120         */
1121        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
1122        /**
1123         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1124         *
1125         * @since 1.4
1126         */
1127        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1128        /**
1129         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1130         *
1131         * @since 1.5
1132         */
1133        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1134        /**
1135         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1136         *
1137         * @since 1.2
1138         */
1139        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1140        /**
1141         * The &quot;Yi Syllables&quot; Unicode Block.
1142         *
1143         * @since 1.4
1144         */
1145        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
1146        /**
1147         * The &quot;Yi Radicals&quot; Unicode Block.
1148         *
1149         * @since 1.4
1150         */
1151        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
1152        /**
1153         * The &quot;Hangul Syllables&quot; Unicode Block.
1154         *
1155         * @since 1.2
1156         */
1157        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
1158        /**
1159         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1160         * code point values in the high surrogate range 0xD800 to 0xDB7F
1161         */
1162        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
1163        /**
1164         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1165         * represents code point values in the high surrogate range 0xDB80 to
1166         * 0xDBFF
1167         */
1168        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1169        /**
1170         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1171         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1172         */
1173        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
1174        /**
1175         * The &quot;Private Use Area&quot; Unicode Block.
1176         *
1177         * @since 1.2
1178         */
1179        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1180        /**
1181         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1182         *
1183         * @since 1.2
1184         */
1185        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1186        /**
1187         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1188         *
1189         * @since 1.2
1190         */
1191        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1192        /**
1193         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1194         *
1195         * @since 1.2
1196         */
1197        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1198        /**
1199         * The &quot;Variation Selectors&quot; Unicode Block.
1200         *
1201         * @since 1.5
1202         */
1203        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1204        /**
1205         * The &quot;Combining Half Marks&quot; Unicode Block.
1206         *
1207         * @since 1.2
1208         */
1209        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1210        /**
1211         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1212         *
1213         * @since 1.2
1214         */
1215        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1216        /**
1217         * The &quot;Small Form Variants&quot; Unicode Block.
1218         *
1219         * @since 1.2
1220         */
1221        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1222        /**
1223         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1224         *
1225         * @since 1.2
1226         */
1227        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1228        /**
1229         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1230         *
1231         * @since 1.2
1232         */
1233        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1234        /**
1235         * The &quot;Specials&quot; Unicode Block.
1236         *
1237         * @since 1.2
1238         */
1239        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
1240        /**
1241         * The &quot;Linear B Syllabary&quot; Unicode Block.
1242         *
1243         * @since 1.2
1244         */
1245        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1246        /**
1247         * The &quot;Linear B Ideograms&quot; Unicode Block.
1248         *
1249         * @since 1.5
1250         */
1251        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1252        /**
1253         * The &quot;Aegean Numbers&quot; Unicode Block.
1254         *
1255         * @since 1.5
1256         */
1257        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
1258        /**
1259         * The &quot;Old Italic&quot; Unicode Block.
1260         *
1261         * @since 1.5
1262         */
1263        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
1264        /**
1265         * The &quot;Gothic&quot; Unicode Block.
1266         *
1267         * @since 1.5
1268         */
1269        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
1270        /**
1271         * The &quot;Ugaritic&quot; Unicode Block.
1272         *
1273         * @since 1.5
1274         */
1275        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
1276        /**
1277         * The &quot;Deseret&quot; Unicode Block.
1278         *
1279         * @since 1.5
1280         */
1281        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
1282        /**
1283         * The &quot;Shavian&quot; Unicode Block.
1284         *
1285         * @since 1.5
1286         */
1287        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
1288        /**
1289         * The &quot;Osmanya&quot; Unicode Block.
1290         *
1291         * @since 1.5
1292         */
1293        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
1294        /**
1295         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1296         *
1297         * @since 1.5
1298         */
1299        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1300        /**
1301         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1302         *
1303         * @since 1.5
1304         */
1305        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1306        /**
1307         * The &quot;Musical Symbols&quot; Unicode Block.
1308         *
1309         * @since 1.5
1310         */
1311        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1312        /**
1313         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1314         *
1315         * @since 1.5
1316         */
1317        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1318        /**
1319         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1320         *
1321         * @since 1.5
1322         */
1323        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1324        /**
1325         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1326         *
1327         * @since 1.5
1328         */
1329        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1330        /**
1331         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1332         *
1333         * @since 1.5
1334         */
1335        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
1336        /**
1337         * The &quot;Tags&quot; Unicode Block.
1338         *
1339         * @since 1.5
1340         */
1341        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
1342        /**
1343         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1344         *
1345         * @since 1.5
1346         */
1347        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1348        /**
1349         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1350         *
1351         * @since 1.5
1352         */
1353        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1354        /**
1355         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1356         *
1357         * @since 1.5
1358         */
1359        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1360
1361        /*
1362         * All of the UnicodeBlocks with valid ranges in ascending order.
1363         */
1364        private static UnicodeBlock[] BLOCKS;
1365
1366        // BEGIN android-changed
1367        // /*
1368        //  * A SortedMap (String.CASE_INSENSITIVE_ORDER) with keys that represents
1369        //  * valid block names and values of the UnicodeBlock constant they map
1370        //  * to.
1371        //  */
1372        // private static final SortedMap<String, UnicodeBlock> BLOCKS_BY_NAME = ...;
1373        // END android-changed
1374
1375        /**
1376         * Retrieves the constant that corresponds to the specified block name.
1377         * The block names are defined by the Unicode 4.0.1 specification in the
1378         * {@code Blocks-4.0.1.txt} file.
1379         * <p>
1380         * Block names may be one of the following:
1381         * <ul>
1382         * <li>Canonical block name, as defined by the Unicode specification;
1383         * case-insensitive.</li>
1384         * <li>Canonical block name without any spaces, as defined by the
1385         * Unicode specification; case-insensitive.</li>
1386         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1387         * uppercasing the canonical name and replacing all spaces and hyphens
1388         * with underscores.</li>
1389         * </ul>
1390         *
1391         * @param blockName
1392         *            the name of the block to retrieve.
1393         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1394         * @throws NullPointerException
1395         *             if {@code blockName} is {@code null}.
1396         * @throws IllegalArgumentException
1397         *             if {@code blockName} is not a valid block name.
1398         * @since 1.5
1399         */
1400        public static final UnicodeBlock forName(String blockName) {
1401            // BEGIN android-note
1402            // trying to get closer to the RI which defines this as final.
1403            // END android-note
1404            if (blockName == null) {
1405                throw new NullPointerException();
1406            }
1407            // BEGIN android-changed
1408            if (BLOCKS == null) {
1409                BLOCKS = UCharacter.getBlockTable();
1410            }
1411            int block = UCharacter.forName(blockName);
1412            if (block == -1) {
1413                if(blockName.equals("SURROGATES_AREA")) {
1414                    return SURROGATES_AREA;
1415                } else if(blockName.equalsIgnoreCase("greek")) {
1416                    return GREEK;
1417                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1418                        blockName.equals("Combining Marks for Symbols") ||
1419                        blockName.equals("CombiningMarksforSymbols")) {
1420                    return COMBINING_MARKS_FOR_SYMBOLS;
1421                }
1422                throw new IllegalArgumentException();
1423            }
1424            return BLOCKS[block];
1425            // END android-changed
1426        }
1427
1428        /**
1429         * Gets the constant for the Unicode block that contains the specified
1430         * character.
1431         *
1432         * @param c
1433         *            the character for which to get the {@code UnicodeBlock}
1434         *            constant.
1435         * @return the {@code UnicodeBlock} constant for the block that contains
1436         *         {@code c}, or {@code null} if {@code c} does not belong to
1437         *         any defined block.
1438         */
1439        public static UnicodeBlock of(char c) {
1440            return of((int) c);
1441        }
1442
1443        /**
1444         * Gets the constant for the Unicode block that contains the specified
1445         * Unicode code point.
1446         *
1447         * @param codePoint
1448         *            the Unicode code point for which to get the
1449         *            {@code UnicodeBlock} constant.
1450         * @return the {@code UnicodeBlock} constant for the block that contains
1451         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1452         *         not belong to any defined block.
1453         * @throws IllegalArgumentException
1454         *             if {@code codePoint} is not a valid Unicode code point.
1455         * @since 1.5
1456         */
1457        public static UnicodeBlock of(int codePoint) {
1458            if (!isValidCodePoint(codePoint)) {
1459                throw new IllegalArgumentException();
1460            }
1461            // BEGIN android-changed
1462            if (BLOCKS == null) {
1463                BLOCKS = UCharacter.getBlockTable();
1464            }
1465            int block = UCharacter.of(codePoint);
1466            if(block == -1 || block >= BLOCKS.length) {
1467                return null;
1468            }
1469            return BLOCKS[block];
1470            // END android-changed
1471        }
1472
1473        // BEGIN android-changed
1474        private UnicodeBlock(String blockName, int start, int end) {
1475            super(blockName);
1476        }
1477        // END android-changed
1478    }
1479
1480    /**
1481     * Constructs a new {@code Character} with the specified primitive char
1482     * value.
1483     *
1484     * @param value
1485     *            the primitive char value to store in the new instance.
1486     */
1487    public Character(char value) {
1488        this.value = value;
1489    }
1490
1491    /**
1492     * Gets the primitive value of this character.
1493     *
1494     * @return this object's primitive value.
1495     */
1496    public char charValue() {
1497        return value;
1498    }
1499
1500    /**
1501     * Compares this object to the specified character object to determine their
1502     * relative order.
1503     *
1504     * @param c
1505     *            the character object to compare this object to.
1506     * @return {@code 0} if the value of this character and the value of
1507     *         {@code c} are equal; a positive value if the value of this
1508     *         character is greater than the value of {@code c}; a negative
1509     *         value if the value of this character is less than the value of
1510     *         {@code c}.
1511     * @see java.lang.Comparable
1512     * @since 1.2
1513     */
1514    public int compareTo(Character c) {
1515        return value - c.value;
1516    }
1517
1518    /**
1519     * Returns a {@code Character} instance for the {@code char} value passed.
1520     * For ASCII/Latin-1 characters (and generally all characters with a Unicode
1521     * value up to 512), this method should be used instead of the constructor,
1522     * as it maintains a cache of corresponding {@code Character} instances.
1523     *
1524     * @param c
1525     *            the char value for which to get a {@code Character} instance.
1526     * @return the {@code Character} instance for {@code c}.
1527     * @since 1.5
1528     */
1529    public static Character valueOf(char c) {
1530        if (c >= CACHE_LEN ) {
1531            return new Character(c);
1532        }
1533        return valueOfCache.CACHE[c];
1534    }
1535
1536    private static final int CACHE_LEN = 512;
1537
1538    static class valueOfCache {
1539        /*
1540        * Provides a cache for the 'valueOf' method. A size of 512 should cache the
1541        * first couple pages of Unicode, which includes the ASCII/Latin-1
1542        * characters, which other parts of this class are optimized for.
1543        */
1544        private static final Character[] CACHE = new Character[CACHE_LEN ];
1545
1546        static {
1547            for(int i=0; i<CACHE.length; i++){
1548                CACHE[i] =  new Character((char)i);
1549            }
1550        }
1551    }
1552    /**
1553     * Indicates whether {@code codePoint} is a valid Unicode code point.
1554     *
1555     * @param codePoint
1556     *            the code point to test.
1557     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1558     *         {@code false} otherwise.
1559     * @since 1.5
1560     */
1561    public static boolean isValidCodePoint(int codePoint) {
1562        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1563    }
1564
1565    /**
1566     * Indicates whether {@code codePoint} is within the supplementary code
1567     * point range.
1568     *
1569     * @param codePoint
1570     *            the code point to test.
1571     * @return {@code true} if {@code codePoint} is within the supplementary
1572     *         code point range; {@code false} otherwise.
1573     * @since 1.5
1574     */
1575    public static boolean isSupplementaryCodePoint(int codePoint) {
1576        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1577    }
1578
1579    /**
1580     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1581     * that is used for representing supplementary characters in UTF-16
1582     * encoding.
1583     *
1584     * @param ch
1585     *            the character to test.
1586     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1587     *         {@code false} otherwise.
1588     * @see #isLowSurrogate(char)
1589     * @since 1.5
1590     */
1591    public static boolean isHighSurrogate(char ch) {
1592        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1593    }
1594
1595    /**
1596     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1597     * that is used for representing supplementary characters in UTF-16
1598     * encoding.
1599     *
1600     * @param ch
1601     *            the character to test.
1602     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1603     *         {@code false} otherwise.
1604     * @see #isHighSurrogate(char)
1605     * @since 1.5
1606     */
1607    public static boolean isLowSurrogate(char ch) {
1608        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1609    }
1610
1611    /**
1612     * Indicates whether the specified character pair is a valid surrogate pair.
1613     *
1614     * @param high
1615     *            the high surrogate unit to test.
1616     * @param low
1617     *            the low surrogate unit to test.
1618     * @return {@code true} if {@code high} is a high-surrogate code unit and
1619     *         {@code low} is a low-surrogate code unit; {@code false}
1620     *         otherwise.
1621     * @see #isHighSurrogate(char)
1622     * @see #isLowSurrogate(char)
1623     * @since 1.5
1624     */
1625    public static boolean isSurrogatePair(char high, char low) {
1626        return (isHighSurrogate(high) && isLowSurrogate(low));
1627    }
1628
1629    /**
1630     * Calculates the number of {@code char} values required to represent the
1631     * specified Unicode code point. This method checks if the {@code codePoint}
1632     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1633     * returned, otherwise {@code 1}. To test if the code point is valid, use
1634     * the {@link #isValidCodePoint(int)} method.
1635     *
1636     * @param codePoint
1637     *            the code point for which to calculate the number of required
1638     *            chars.
1639     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1640     * @see #isValidCodePoint(int)
1641     * @see #isSupplementaryCodePoint(int)
1642     * @since 1.5
1643     */
1644    public static int charCount(int codePoint) {
1645        return (codePoint >= 0x10000 ? 2 : 1);
1646    }
1647
1648    /**
1649     * Converts a surrogate pair into a Unicode code point. This method assumes
1650     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1651     * surrogates, then the result is indeterminate. The
1652     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1653     * method to validate the pair.
1654     *
1655     * @param high
1656     *            the high surrogate unit.
1657     * @param low
1658     *            the low surrogate unit.
1659     * @return the Unicode code point corresponding to the surrogate unit pair.
1660     * @see #isSurrogatePair(char, char)
1661     * @since 1.5
1662     */
1663    public static int toCodePoint(char high, char low) {
1664        // See RFC 2781, Section 2.2
1665        // http://www.faqs.org/rfcs/rfc2781.html
1666        int h = (high & 0x3FF) << 10;
1667        int l = low & 0x3FF;
1668        return (h | l) + 0x10000;
1669    }
1670
1671    /**
1672     * Returns the code point at {@code index} in the specified sequence of
1673     * character units. If the unit at {@code index} is a high-surrogate unit,
1674     * {@code index + 1} is less than the length of the sequence and the unit at
1675     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1676     * point represented by the pair is returned; otherwise the {@code char}
1677     * value at {@code index} is returned.
1678     *
1679     * @param seq
1680     *            the source sequence of {@code char} units.
1681     * @param index
1682     *            the position in {@code seq} from which to retrieve the code
1683     *            point.
1684     * @return the Unicode code point or {@code char} value at {@code index} in
1685     *         {@code seq}.
1686     * @throws NullPointerException
1687     *             if {@code seq} is {@code null}.
1688     * @throws IndexOutOfBoundsException
1689     *             if the {@code index} is negative or greater than or equal to
1690     *             the length of {@code seq}.
1691     * @since 1.5
1692     */
1693    public static int codePointAt(CharSequence seq, int index) {
1694        if (seq == null) {
1695            throw new NullPointerException();
1696        }
1697        int len = seq.length();
1698        if (index < 0 || index >= len) {
1699            throw new IndexOutOfBoundsException();
1700        }
1701
1702        char high = seq.charAt(index++);
1703        if (index >= len) {
1704            return high;
1705        }
1706        char low = seq.charAt(index);
1707        if (isSurrogatePair(high, low)) {
1708            return toCodePoint(high, low);
1709        }
1710        return high;
1711    }
1712
1713    /**
1714     * Returns the code point at {@code index} in the specified array of
1715     * character units. If the unit at {@code index} is a high-surrogate unit,
1716     * {@code index + 1} is less than the length of the array and the unit at
1717     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1718     * point represented by the pair is returned; otherwise the {@code char}
1719     * value at {@code index} is returned.
1720     *
1721     * @param seq
1722     *            the source array of {@code char} units.
1723     * @param index
1724     *            the position in {@code seq} from which to retrieve the code
1725     *            point.
1726     * @return the Unicode code point or {@code char} value at {@code index} in
1727     *         {@code seq}.
1728     * @throws NullPointerException
1729     *             if {@code seq} is {@code null}.
1730     * @throws IndexOutOfBoundsException
1731     *             if the {@code index} is negative or greater than or equal to
1732     *             the length of {@code seq}.
1733     * @since 1.5
1734     */
1735    public static int codePointAt(char[] seq, int index) {
1736        if (seq == null) {
1737            throw new NullPointerException();
1738        }
1739        int len = seq.length;
1740        if (index < 0 || index >= len) {
1741            throw new IndexOutOfBoundsException();
1742        }
1743
1744        char high = seq[index++];
1745        if (index >= len) {
1746            return high;
1747        }
1748        char low = seq[index];
1749        if (isSurrogatePair(high, low)) {
1750            return toCodePoint(high, low);
1751        }
1752        return high;
1753    }
1754
1755    /**
1756     * Returns the code point at {@code index} in the specified array of
1757     * character units, where {@code index} has to be less than {@code limit}.
1758     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1759     * is less than {@code limit} and the unit at {@code index + 1} is a
1760     * low-surrogate unit, then the supplementary code point represented by the
1761     * pair is returned; otherwise the {@code char} value at {@code index} is
1762     * returned.
1763     *
1764     * @param seq
1765     *            the source array of {@code char} units.
1766     * @param index
1767     *            the position in {@code seq} from which to get the code point.
1768     * @param limit
1769     *            the index after the last unit in {@code seq} that can be used.
1770     * @return the Unicode code point or {@code char} value at {@code index} in
1771     *         {@code seq}.
1772     * @throws NullPointerException
1773     *             if {@code seq} is {@code null}.
1774     * @throws IndexOutOfBoundsException
1775     *             if {@code index < 0}, {@code index >= limit},
1776     *             {@code limit < 0} or if {@code limit} is greater than the
1777     *             length of {@code seq}.
1778     * @since 1.5
1779     */
1780    public static int codePointAt(char[] seq, int index, int limit) {
1781        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1782            throw new IndexOutOfBoundsException();
1783        }
1784
1785        char high = seq[index++];
1786        if (index >= limit) {
1787            return high;
1788        }
1789        char low = seq[index];
1790        if (isSurrogatePair(high, low)) {
1791            return toCodePoint(high, low);
1792        }
1793        return high;
1794    }
1795
1796    /**
1797     * Returns the code point that preceds {@code index} in the specified
1798     * sequence of character units. If the unit at {@code index - 1} is a
1799     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1800     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1801     * point represented by the pair is returned; otherwise the {@code char}
1802     * value at {@code index - 1} is returned.
1803     *
1804     * @param seq
1805     *            the source sequence of {@code char} units.
1806     * @param index
1807     *            the position in {@code seq} following the code
1808     *            point that should be returned.
1809     * @return the Unicode code point or {@code char} value before {@code index}
1810     *         in {@code seq}.
1811     * @throws NullPointerException
1812     *             if {@code seq} is {@code null}.
1813     * @throws IndexOutOfBoundsException
1814     *             if the {@code index} is less than 1 or greater than the
1815     *             length of {@code seq}.
1816     * @since 1.5
1817     */
1818    public static int codePointBefore(CharSequence seq, int index) {
1819        if (seq == null) {
1820            throw new NullPointerException();
1821        }
1822        int len = seq.length();
1823        if (index < 1 || index > len) {
1824            throw new IndexOutOfBoundsException();
1825        }
1826
1827        char low = seq.charAt(--index);
1828        if (--index < 0) {
1829            return low;
1830        }
1831        char high = seq.charAt(index);
1832        if (isSurrogatePair(high, low)) {
1833            return toCodePoint(high, low);
1834        }
1835        return low;
1836    }
1837
1838    /**
1839     * Returns the code point that preceds {@code index} in the specified
1840     * array of character units. If the unit at {@code index - 1} is a
1841     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1842     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1843     * point represented by the pair is returned; otherwise the {@code char}
1844     * value at {@code index - 1} is returned.
1845     *
1846     * @param seq
1847     *            the source array of {@code char} units.
1848     * @param index
1849     *            the position in {@code seq} following the code
1850     *            point that should be returned.
1851     * @return the Unicode code point or {@code char} value before {@code index}
1852     *         in {@code seq}.
1853     * @throws NullPointerException
1854     *             if {@code seq} is {@code null}.
1855     * @throws IndexOutOfBoundsException
1856     *             if the {@code index} is less than 1 or greater than the
1857     *             length of {@code seq}.
1858     * @since 1.5
1859     */
1860    public static int codePointBefore(char[] seq, int index) {
1861        if (seq == null) {
1862            throw new NullPointerException();
1863        }
1864        int len = seq.length;
1865        if (index < 1 || index > len) {
1866            throw new IndexOutOfBoundsException();
1867        }
1868
1869        char low = seq[--index];
1870        if (--index < 0) {
1871            return low;
1872        }
1873        char high = seq[index];
1874        if (isSurrogatePair(high, low)) {
1875            return toCodePoint(high, low);
1876        }
1877        return low;
1878    }
1879
1880    /**
1881     * Returns the code point that preceds the {@code index} in the specified
1882     * array of character units and is not less than {@code start}. If the unit
1883     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1884     * less than {@code start} and the unit at {@code index - 2} is a
1885     * high-surrogate unit, then the supplementary code point represented by the
1886     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1887     * is returned.
1888     *
1889     * @param seq
1890     *            the source array of {@code char} units.
1891     * @param index
1892     *            the position in {@code seq} following the code point that
1893     *            should be returned.
1894     * @param start
1895     *            the index of the first element in {@code seq}.
1896     * @return the Unicode code point or {@code char} value before {@code index}
1897     *         in {@code seq}.
1898     * @throws NullPointerException
1899     *             if {@code seq} is {@code null}.
1900     * @throws IndexOutOfBoundsException
1901     *             if the {@code index <= start}, {@code start < 0},
1902     *             {@code index} is greater than the length of {@code seq}, or
1903     *             if {@code start} is equal or greater than the length of
1904     *             {@code seq}.
1905     * @since 1.5
1906     */
1907    public static int codePointBefore(char[] seq, int index, int start) {
1908        if (seq == null) {
1909            throw new NullPointerException();
1910        }
1911        int len = seq.length;
1912        if (index <= start || index > len || start < 0 || start >= len) {
1913            throw new IndexOutOfBoundsException();
1914        }
1915
1916        char low = seq[--index];
1917        if (--index < start) {
1918            return low;
1919        }
1920        char high = seq[index];
1921        if (isSurrogatePair(high, low)) {
1922            return toCodePoint(high, low);
1923        }
1924        return low;
1925    }
1926
1927    /**
1928     * Converts the specified Unicode code point into a UTF-16 encoded sequence
1929     * and copies the value(s) into the char array {@code dst}, starting at
1930     * index {@code dstIndex}.
1931     *
1932     * @param codePoint
1933     *            the Unicode code point to encode.
1934     * @param dst
1935     *            the destination array to copy the encoded value into.
1936     * @param dstIndex
1937     *            the index in {@code dst} from where to start copying.
1938     * @return the number of {@code char} value units copied into {@code dst}.
1939     * @throws IllegalArgumentException
1940     *             if {@code codePoint} is not a valid Unicode code point.
1941     * @throws NullPointerException
1942     *             if {@code dst} is {@code null}.
1943     * @throws IndexOutOfBoundsException
1944     *             if {@code dstIndex} is negative, greater than or equal to
1945     *             {@code dst.length} or equals {@code dst.length - 1} when
1946     *             {@code codePoint} is a
1947     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
1948     * @since 1.5
1949     */
1950    public static int toChars(int codePoint, char[] dst, int dstIndex) {
1951        if (!isValidCodePoint(codePoint)) {
1952            throw new IllegalArgumentException();
1953        }
1954        if (dst == null) {
1955            throw new NullPointerException();
1956        }
1957        if (dstIndex < 0 || dstIndex >= dst.length) {
1958            throw new IndexOutOfBoundsException();
1959        }
1960
1961        if (isSupplementaryCodePoint(codePoint)) {
1962            if (dstIndex == dst.length - 1) {
1963                throw new IndexOutOfBoundsException();
1964            }
1965            // See RFC 2781, Section 2.1
1966            // http://www.faqs.org/rfcs/rfc2781.html
1967            int cpPrime = codePoint - 0x10000;
1968            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
1969            int low = 0xDC00 | (cpPrime & 0x3FF);
1970            dst[dstIndex] = (char) high;
1971            dst[dstIndex + 1] = (char) low;
1972            return 2;
1973        }
1974
1975        dst[dstIndex] = (char) codePoint;
1976        return 1;
1977    }
1978
1979    /**
1980     * Converts the specified Unicode code point into a UTF-16 encoded sequence
1981     * and returns it as a char array.
1982     *
1983     * @param codePoint
1984     *            the Unicode code point to encode.
1985     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
1986     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
1987     *         then the returned array contains two characters, otherwise it
1988     *         contains just one character.
1989     * @throws IllegalArgumentException
1990     *             if {@code codePoint} is not a valid Unicode code point.
1991     * @since 1.5
1992     */
1993    public static char[] toChars(int codePoint) {
1994        if (!isValidCodePoint(codePoint)) {
1995            throw new IllegalArgumentException();
1996        }
1997
1998        if (isSupplementaryCodePoint(codePoint)) {
1999            int cpPrime = codePoint - 0x10000;
2000            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2001            int low = 0xDC00 | (cpPrime & 0x3FF);
2002            return new char[] { (char) high, (char) low };
2003        }
2004        return new char[] { (char) codePoint };
2005    }
2006
2007    /**
2008     * Counts the number of Unicode code points in the subsequence of the
2009     * specified character sequence, as delineated by {@code beginIndex} and
2010     * {@code endIndex}. Any surrogate values with missing pair values will be
2011     * counted as one code point.
2012     *
2013     * @param seq
2014     *            the {@code CharSequence} to look through.
2015     * @param beginIndex
2016     *            the inclusive index to begin counting at.
2017     * @param endIndex
2018     *            the exclusive index to stop counting at.
2019     * @return the number of Unicode code points.
2020     * @throws NullPointerException
2021     *             if {@code seq} is {@code null}.
2022     * @throws IndexOutOfBoundsException
2023     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2024     *             if {@code endIndex} is greater than the length of {@code seq}.
2025     * @since 1.5
2026     */
2027    public static int codePointCount(CharSequence seq, int beginIndex,
2028            int endIndex) {
2029        if (seq == null) {
2030            throw new NullPointerException();
2031        }
2032        int len = seq.length();
2033        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2034            throw new IndexOutOfBoundsException();
2035        }
2036
2037        int result = 0;
2038        for (int i = beginIndex; i < endIndex; i++) {
2039            char c = seq.charAt(i);
2040            if (isHighSurrogate(c)) {
2041                if (++i < endIndex) {
2042                    c = seq.charAt(i);
2043                    if (!isLowSurrogate(c)) {
2044                        result++;
2045                    }
2046                }
2047            }
2048            result++;
2049        }
2050        return result;
2051    }
2052
2053    /**
2054     * Counts the number of Unicode code points in the subsequence of the
2055     * specified char array, as delineated by {@code offset} and {@code count}.
2056     * Any surrogate values with missing pair values will be counted as one code
2057     * point.
2058     *
2059     * @param seq
2060     *            the char array to look through
2061     * @param offset
2062     *            the inclusive index to begin counting at.
2063     * @param count
2064     *            the number of {@code char} values to look through in
2065     *            {@code seq}.
2066     * @return the number of Unicode code points.
2067     * @throws NullPointerException
2068     *             if {@code seq} is {@code null}.
2069     * @throws IndexOutOfBoundsException
2070     *             if {@code offset < 0}, {@code count < 0} or if
2071     *             {@code offset + count} is greater than the length of
2072     *             {@code seq}.
2073     * @since 1.5
2074     */
2075    public static int codePointCount(char[] seq, int offset, int count) {
2076        if (seq == null) {
2077            throw new NullPointerException();
2078        }
2079        int len = seq.length;
2080        int endIndex = offset + count;
2081        if (offset < 0 || count < 0 || endIndex > len) {
2082            throw new IndexOutOfBoundsException();
2083        }
2084
2085        int result = 0;
2086        for (int i = offset; i < endIndex; i++) {
2087            char c = seq[i];
2088            if (isHighSurrogate(c)) {
2089                if (++i < endIndex) {
2090                    c = seq[i];
2091                    if (!isLowSurrogate(c)) {
2092                        result++;
2093                    }
2094                }
2095            }
2096            result++;
2097        }
2098        return result;
2099    }
2100
2101    /**
2102     * Determines the index in the specified character sequence that is offset
2103     * {@code codePointOffset} code points from {@code index}.
2104     *
2105     * @param seq
2106     *            the character sequence to find the index in.
2107     * @param index
2108     *            the start index in {@code seq}.
2109     * @param codePointOffset
2110     *            the number of code points to look backwards or forwards; may
2111     *            be a negative or positive value.
2112     * @return the index in {@code seq} that is {@code codePointOffset} code
2113     *         points away from {@code index}.
2114     * @throws NullPointerException
2115     *             if {@code seq} is {@code null}.
2116     * @throws IndexOutOfBoundsException
2117     *             if {@code index < 0}, {@code index} is greater than the
2118     *             length of {@code seq}, or if there are not enough values in
2119     *             {@code seq} to skip {@code codePointOffset} code points
2120     *             forwards or backwards (if {@code codePointOffset} is
2121     *             negative) from {@code index}.
2122     * @since 1.5
2123     */
2124    public static int offsetByCodePoints(CharSequence seq, int index,
2125            int codePointOffset) {
2126        if (seq == null) {
2127            throw new NullPointerException();
2128        }
2129        int len = seq.length();
2130        if (index < 0 || index > len) {
2131            throw new IndexOutOfBoundsException();
2132        }
2133
2134        if (codePointOffset == 0) {
2135            return index;
2136        }
2137
2138        if (codePointOffset > 0) {
2139            int codePoints = codePointOffset;
2140            int i = index;
2141            while (codePoints > 0) {
2142                codePoints--;
2143                if (i >= len) {
2144                    throw new IndexOutOfBoundsException();
2145                }
2146                if (isHighSurrogate(seq.charAt(i))) {
2147                    int next = i + 1;
2148                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2149                        i++;
2150                    }
2151                }
2152                i++;
2153            }
2154            return i;
2155        }
2156
2157        assert codePointOffset < 0;
2158        int codePoints = -codePointOffset;
2159        int i = index;
2160        while (codePoints > 0) {
2161            codePoints--;
2162            i--;
2163            if (i < 0) {
2164                throw new IndexOutOfBoundsException();
2165            }
2166            if (isLowSurrogate(seq.charAt(i))) {
2167                int prev = i - 1;
2168                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2169                    i--;
2170                }
2171            }
2172        }
2173        return i;
2174    }
2175
2176    /**
2177     * Determines the index in a subsequence of the specified character array
2178     * that is offset {@code codePointOffset} code points from {@code index}.
2179     * The subsequence is delineated by {@code start} and {@code count}.
2180     *
2181     * @param seq
2182     *            the character array to find the index in.
2183     * @param start
2184     *            the inclusive index that marks the beginning of the
2185     *            subsequence.
2186     * @param count
2187     *            the number of {@code char} values to include within the
2188     *            subsequence.
2189     * @param index
2190     *            the start index in the subsequence of the char array.
2191     * @param codePointOffset
2192     *            the number of code points to look backwards or forwards; may
2193     *            be a negative or positive value.
2194     * @return the index in {@code seq} that is {@code codePointOffset} code
2195     *         points away from {@code index}.
2196     * @throws NullPointerException
2197     *             if {@code seq} is {@code null}.
2198     * @throws IndexOutOfBoundsException
2199     *             if {@code start < 0}, {@code count < 0},
2200     *             {@code index < start}, {@code index > start + count},
2201     *             {@code start + count} is greater than the length of
2202     *             {@code seq}, or if there are not enough values in
2203     *             {@code seq} to skip {@code codePointOffset} code points
2204     *             forward or backward (if {@code codePointOffset} is
2205     *             negative) from {@code index}.
2206     * @since 1.5
2207     */
2208    public static int offsetByCodePoints(char[] seq, int start, int count,
2209            int index, int codePointOffset) {
2210        if (seq == null) {
2211            throw new NullPointerException();
2212        }
2213        int end = start + count;
2214        if (start < 0 || count < 0 || end > seq.length || index < start
2215                || index > end) {
2216            throw new IndexOutOfBoundsException();
2217        }
2218
2219        if (codePointOffset == 0) {
2220            return index;
2221        }
2222
2223        if (codePointOffset > 0) {
2224            int codePoints = codePointOffset;
2225            int i = index;
2226            while (codePoints > 0) {
2227                codePoints--;
2228                if (i >= end) {
2229                    throw new IndexOutOfBoundsException();
2230                }
2231                if (isHighSurrogate(seq[i])) {
2232                    int next = i + 1;
2233                    if (next < end && isLowSurrogate(seq[next])) {
2234                        i++;
2235                    }
2236                }
2237                i++;
2238            }
2239            return i;
2240        }
2241
2242        assert codePointOffset < 0;
2243        int codePoints = -codePointOffset;
2244        int i = index;
2245        while (codePoints > 0) {
2246            codePoints--;
2247            i--;
2248            if (i < start) {
2249                throw new IndexOutOfBoundsException();
2250            }
2251            if (isLowSurrogate(seq[i])) {
2252                int prev = i - 1;
2253                if (prev >= start && isHighSurrogate(seq[prev])) {
2254                    i--;
2255                }
2256            }
2257        }
2258        return i;
2259    }
2260
2261    /**
2262     * Convenience method to determine the value of the specified character
2263     * {@code c} in the supplied radix. The value of {@code radix} must be
2264     * between MIN_RADIX and MAX_RADIX.
2265     *
2266     * @param c
2267     *            the character to determine the value of.
2268     * @param radix
2269     *            the radix.
2270     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2271     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2272     */
2273    public static int digit(char c, int radix) {
2274        // BEGIN android-changed
2275        // if (radix >= MIN_RADIX && radix <= MAX_RADIX) {
2276        //     if (c < 128) {
2277        //         // Optimized for ASCII
2278        //         int result = -1;
2279        //         if ('0' <= c && c <= '9') {
2280        //             result = c - '0';
2281        //         } else if ('a' <= c && c <= 'z') {
2282        //             result = c - ('a' - 10);
2283        //         } else if ('A' <= c && c <= 'Z') {
2284        //             result = c - ('A' - 10);
2285        //         }
2286        //         return result < radix ? result : -1;
2287        //     }
2288        //     int result = BinarySearch.binarySearchRange(digitKeys, c);
2289        //     if (result >= 0 && c <= digitValues[result * 2]) {
2290        //         int value = (char) (c - digitValues[result * 2 + 1]);
2291        //         if (value >= radix) {
2292        //             return -1;
2293        //         }
2294        //         return value;
2295        //     }
2296        // }
2297        // return -1;
2298        return UCharacter.digit(c, radix);
2299        // ENd android-changed
2300    }
2301
2302    /**
2303     * Convenience method to determine the value of the character
2304     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2305     * be between MIN_RADIX and MAX_RADIX.
2306     *
2307     * @param codePoint
2308     *            the character, including supplementary characters.
2309     * @param radix
2310     *            the radix.
2311     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2312     *         {@link #MAX_RADIX} then the value of the character in the radix;
2313     *         -1 otherwise.
2314     */
2315    public static int digit(int codePoint, int radix) {
2316        return UCharacter.digit(codePoint, radix);
2317    }
2318
2319    /**
2320     * Compares this object with the specified object and indicates if they are
2321     * equal. In order to be equal, {@code object} must be an instance of
2322     * {@code Character} and have the same char value as this object.
2323     *
2324     * @param object
2325     *            the object to compare this double with.
2326     * @return {@code true} if the specified object is equal to this
2327     *         {@code Character}; {@code false} otherwise.
2328     */
2329    @Override
2330    public boolean equals(Object object) {
2331        return (object instanceof Character)
2332                && (value == ((Character) object).value);
2333    }
2334
2335    /**
2336     * Returns the character which represents the specified digit in the
2337     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2338     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2339     * smaller than {@code radix}. If any of these conditions does not hold, 0
2340     * is returned.
2341     *
2342     * @param digit
2343     *            the integer value.
2344     * @param radix
2345     *            the radix.
2346     * @return the character which represents the {@code digit} in the
2347     *         {@code radix}.
2348     */
2349    public static char forDigit(int digit, int radix) {
2350        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2351            if (0 <= digit && digit < radix) {
2352                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2353            }
2354        }
2355        return 0;
2356    }
2357
2358    /**
2359     * Gets the numeric value of the specified Unicode character.
2360     *
2361     * @param c
2362     *            the Unicode character to get the numeric value of.
2363     * @return a non-negative numeric integer value if a numeric value for
2364     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2365     *         -2 if the numeric value can not be represented with an integer.
2366     */
2367    public static int getNumericValue(char c) {
2368        // BEGIN android-changed
2369        // if (c < 128) {
2370        //     // Optimized for ASCII
2371        //     if (c >= '0' && c <= '9') {
2372        //         return c - '0';
2373        //     }
2374        //     if (c >= 'a' && c <= 'z') {
2375        //         return c - ('a' - 10);
2376        //     }
2377        //     if (c >= 'A' && c <= 'Z') {
2378        //         return c - ('A' - 10);
2379        //     }
2380        //     return -1;
2381        // }
2382        // int result = BinarySearch.binarySearchRange(numericKeys, c);
2383        // if (result >= 0 && c <= numericValues[result * 2]) {
2384        //     char difference = numericValues[result * 2 + 1];
2385        //     if (difference == 0) {
2386        //         return -2;
2387        //     }
2388        //     // Value is always positive, must be negative value
2389        //     if (difference > c) {
2390        //         return c - (short) difference;
2391        //     }
2392        //     return c - difference;
2393        // }
2394        // return -1;
2395        return UCharacter.getNumericValue(c);
2396        // END android-changed
2397    }
2398
2399    /**
2400     * Gets the numeric value of the specified Unicode code point. For example,
2401     * the code point '\u216B' stands for the Roman number XII, which has the
2402     * numeric value 12.
2403     *
2404     * @param codePoint
2405     *            the Unicode code point to get the numeric value of.
2406     * @return a non-negative numeric integer value if a numeric value for
2407     *         {@code codePoint} exists, -1 if there is no numeric value for
2408     *         {@code codePoint}, -2 if the numeric value can not be
2409     *         represented with an integer.
2410     */
2411    public static int getNumericValue(int codePoint) {
2412        return UCharacter.getNumericValue(codePoint);
2413    }
2414
2415    /**
2416     * Gets the general Unicode category of the specified character.
2417     *
2418     * @param c
2419     *            the character to get the category of.
2420     * @return the Unicode category of {@code c}.
2421     */
2422    public static int getType(char c) {
2423        // BEGIN android-changed
2424        // if(c < 1000) {
2425        //     return typeValuesCache[(int)c];
2426        // }
2427        // int result = BinarySearch.binarySearchRange(typeKeys, c);
2428        // int high = typeValues[result * 2];
2429        // if (c <= high) {
2430        //     int code = typeValues[result * 2 + 1];
2431        //     if (code < 0x100) {
2432        //         return code;
2433        //     }
2434        //     return (c & 1) == 1 ? code >> 8 : code & 0xff;
2435        // }
2436        // return UNASSIGNED;
2437        return getType((int) c);
2438        // END android-changed
2439    }
2440
2441    /**
2442     * Gets the general Unicode category of the specified code point.
2443     *
2444     * @param codePoint
2445     *            the Unicode code point to get the category of.
2446     * @return the Unicode category of {@code codePoint}.
2447     */
2448    public static int getType(int codePoint) {
2449        // BEGIN android-changed
2450    	// if (codePoint < 1000 && codePoint > 0) {
2451    	//     return typeValuesCache[codePoint];
2452    	// }
2453        // END android-changed
2454        int type = UCharacter.getType(codePoint);
2455
2456        // the type values returned by UCharacter are not compatible with what
2457        // the spec says.RI's Character type values skip the value 17.
2458        if (type <= Character.FORMAT) {
2459            return type;
2460        }
2461        return (type + 1);
2462    }
2463
2464    /**
2465     * Gets the Unicode directionality of the specified character.
2466     *
2467     * @param c
2468     *            the character to get the directionality of.
2469     * @return the Unicode directionality of {@code c}.
2470     */
2471    public static byte getDirectionality(char c) {
2472        // BEGIN android-changed
2473        // int result = BinarySearch.binarySearchRange(bidiKeys, c);
2474        // int high = bidiValues[result * 2];
2475        // if (c <= high) {
2476        //     int code = bidiValues[result * 2 + 1];
2477        //     if (code < 0x100) {
2478        //         return (byte) (code - 1);
2479        //     }
2480        //     return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1);
2481        // }
2482        // return DIRECTIONALITY_UNDEFINED;
2483        return getDirectionality((int)c);
2484        // END android-changed
2485    }
2486
2487    /**
2488     * Gets the Unicode directionality of the specified character.
2489     *
2490     * @param codePoint
2491     *            the Unicode code point to get the directionality of.
2492     * @return the Unicode directionality of {@code codePoint}.
2493     */
2494    public static byte getDirectionality(int codePoint) {
2495        if (getType(codePoint) == Character.UNASSIGNED) {
2496            return Character.DIRECTIONALITY_UNDEFINED;
2497        }
2498
2499        byte UCDirectionality = UCharacter.getDirectionality(codePoint);
2500        if (UCDirectionality == -1) {
2501            return -1;
2502        }
2503        return DIRECTIONALITY[UCDirectionality];
2504    }
2505
2506    /**
2507     * Indicates whether the specified character is mirrored.
2508     *
2509     * @param c
2510     *            the character to check.
2511     * @return {@code true} if {@code c} is mirrored; {@code false}
2512     *         otherwise.
2513     */
2514    public static boolean isMirrored(char c) {
2515        // BEGIN android-changed
2516        // int value = c / 16;
2517        // if (value >= mirrored.length) {
2518        //     return false;
2519        // }
2520        // int bit = 1 << (c % 16);
2521        // return (mirrored[value] & bit) != 0;
2522        return isMirrored((int)c);
2523        // ENd android-changed
2524    }
2525
2526    /**
2527     * Indicates whether the specified code point is mirrored.
2528     *
2529     * @param codePoint
2530     *            the code point to check.
2531     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2532     *         otherwise.
2533     */
2534    public static boolean isMirrored(int codePoint) {
2535        return UCharacter.isMirrored(codePoint);
2536    }
2537
2538    @Override
2539    public int hashCode() {
2540        return value;
2541    }
2542
2543    /**
2544     * Indicates whether the specified character is defined in the Unicode
2545     * specification.
2546     *
2547     * @param c
2548     *            the character to check.
2549     * @return {@code true} if the general Unicode category of the character is
2550     *         not {@code UNASSIGNED}; {@code false} otherwise.
2551     */
2552    public static boolean isDefined(char c) {
2553        // BEGIN android-changed
2554        // return getType(c) != UNASSIGNED;
2555        return UCharacter.isDefined(c);
2556        // END android-changed
2557    }
2558
2559    /**
2560     * Indicates whether the specified code point is defined in the Unicode
2561     * specification.
2562     *
2563     * @param codePoint
2564     *            the code point to check.
2565     * @return {@code true} if the general Unicode category of the code point is
2566     *         not {@code UNASSIGNED}; {@code false} otherwise.
2567     */
2568    public static boolean isDefined(int codePoint) {
2569        return UCharacter.isDefined(codePoint);
2570    }
2571
2572    /**
2573     * Indicates whether the specified character is a digit.
2574     *
2575     * @param c
2576     *            the character to check.
2577     * @return {@code true} if {@code c} is a digit; {@code false}
2578     *         otherwise.
2579     */
2580    public static boolean isDigit(char c) {
2581        // Optimized case for ASCII
2582        if ('0' <= c && c <= '9') {
2583            return true;
2584        }
2585        if (c < 1632) {
2586            return false;
2587        }
2588        // BEGIN android-changed
2589        return UCharacter.isDigit(c);
2590        // END android-changed
2591    }
2592
2593    /**
2594     * Indicates whether the specified code point is a digit.
2595     *
2596     * @param codePoint
2597     *            the code point to check.
2598     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2599     *         otherwise.
2600     */
2601    public static boolean isDigit(int codePoint) {
2602        return UCharacter.isDigit(codePoint);
2603    }
2604
2605    /**
2606     * Indicates whether the specified character is ignorable in a Java or
2607     * Unicode identifier.
2608     *
2609     * @param c
2610     *            the character to check.
2611     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2612     */
2613    public static boolean isIdentifierIgnorable(char c) {
2614        // BEGIN android-changed
2615        // return (c >= 0 && c <= 8) || (c >= 0xe && c <= 0x1b)
2616        //         || (c >= 0x7f && c <= 0x9f) || getType(c) == FORMAT;
2617        return UCharacter.isIdentifierIgnorable(c);
2618        // END android-changed
2619    }
2620
2621    /**
2622     * Indicates whether the specified code point is ignorable in a Java or
2623     * Unicode identifier.
2624     *
2625     * @param codePoint
2626     *            the code point to check.
2627     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2628     *         otherwise.
2629     */
2630    public static boolean isIdentifierIgnorable(int codePoint) {
2631        return UCharacter.isIdentifierIgnorable(codePoint);
2632    }
2633
2634    /**
2635     * Indicates whether the specified character is an ISO control character.
2636     *
2637     * @param c
2638     *            the character to check.
2639     * @return {@code true} if {@code c} is an ISO control character;
2640     *         {@code false} otherwise.
2641     */
2642    public static boolean isISOControl(char c) {
2643        return isISOControl((int)c);
2644    }
2645
2646    /**
2647     * Indicates whether the specified code point is an ISO control character.
2648     *
2649     * @param c
2650     *            the code point to check.
2651     * @return {@code true} if {@code c} is an ISO control character;
2652     *         {@code false} otherwise.
2653     */
2654    public static boolean isISOControl(int c) {
2655        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2656    }
2657
2658    /**
2659     * Indicates whether the specified character is a valid part of a Java
2660     * identifier other than the first character.
2661     *
2662     * @param c
2663     *            the character to check.
2664     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2665     *         {@code false} otherwise.
2666     */
2667    public static boolean isJavaIdentifierPart(char c) {
2668        // Optimized case for ASCII
2669        if (c < 128) {
2670            return (typeTags[c] & ISJAVAPART) != 0;
2671        }
2672
2673        int type = getType(c);
2674        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2675                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2676                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2677                || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK
2678                || (c >= 0x80 && c <= 0x9f) || type == FORMAT;
2679    }
2680
2681    /**
2682     * Indicates whether the specified code point is a valid part of a Java
2683     * identifier other than the first character.
2684     *
2685     * @param codePoint
2686     *            the code point to check.
2687     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2688     *         {@code false} otherwise.
2689     */
2690    public static boolean isJavaIdentifierPart(int codePoint) {
2691        int type = getType(codePoint);
2692        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2693                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2694                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2695                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2696                || isIdentifierIgnorable(codePoint);
2697    }
2698
2699    /**
2700     * Indicates whether the specified character is a valid first character for
2701     * a Java identifier.
2702     *
2703     * @param c
2704     *            the character to check.
2705     * @return {@code true} if {@code c} is a valid first character of a Java
2706     *         identifier; {@code false} otherwise.
2707     */
2708    public static boolean isJavaIdentifierStart(char c) {
2709        // Optimized case for ASCII
2710        if (c < 128) {
2711            return (typeTags[c] & ISJAVASTART) != 0;
2712        }
2713
2714        int type = getType(c);
2715        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2716                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2717                || type == LETTER_NUMBER;
2718    }
2719
2720    /**
2721     * Indicates whether the specified code point is a valid start for a Java
2722     * identifier.
2723     *
2724     * @param codePoint
2725     *            the code point to check.
2726     * @return {@code true} if {@code codePoint} is a valid start of a Java
2727     *         identifier; {@code false} otherwise.
2728     */
2729    public static boolean isJavaIdentifierStart(int codePoint) {
2730        int type = getType(codePoint);
2731        return isLetter(codePoint) || type == CURRENCY_SYMBOL
2732                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2733    }
2734
2735    /**
2736     * Indicates whether the specified character is a Java letter.
2737     *
2738     * @param c
2739     *            the character to check.
2740     * @return {@code true} if {@code c} is a Java letter; {@code false}
2741     *         otherwise.
2742     * @deprecated Use {@link #isJavaIdentifierStart(char)}
2743     */
2744    @Deprecated
2745    public static boolean isJavaLetter(char c) {
2746        return isJavaIdentifierStart(c);
2747    }
2748
2749    /**
2750     * Indicates whether the specified character is a Java letter or digit
2751     * character.
2752     *
2753     * @param c
2754     *            the character to check.
2755     * @return {@code true} if {@code c} is a Java letter or digit;
2756     *         {@code false} otherwise.
2757     * @deprecated Use {@link #isJavaIdentifierPart(char)}
2758     */
2759    @Deprecated
2760    public static boolean isJavaLetterOrDigit(char c) {
2761        return isJavaIdentifierPart(c);
2762    }
2763
2764    /**
2765     * Indicates whether the specified character is a letter.
2766     *
2767     * @param c
2768     *            the character to check.
2769     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2770     */
2771    public static boolean isLetter(char c) {
2772        // BEGIN android-changed
2773        // if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
2774        //     return true;
2775        // }
2776        // if (c < 128) {
2777        //     return false;
2778        // }
2779        // int type = getType(c);
2780        // return type >= UPPERCASE_LETTER && type <= OTHER_LETTER;
2781        return UCharacter.isLetter(c);
2782        // END android-changed
2783    }
2784
2785    /**
2786     * Indicates whether the specified code point is a letter.
2787     *
2788     * @param codePoint
2789     *            the code point to check.
2790     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2791     *         otherwise.
2792     */
2793    public static boolean isLetter(int codePoint) {
2794        return UCharacter.isLetter(codePoint);
2795    }
2796
2797    /**
2798     * Indicates whether the specified character is a letter or a digit.
2799     *
2800     * @param c
2801     *            the character to check.
2802     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2803     *         otherwise.
2804     */
2805    public static boolean isLetterOrDigit(char c) {
2806        // BEGIN android-changed
2807        // int type = getType(c);
2808        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2809        //         || type == DECIMAL_DIGIT_NUMBER;
2810        return UCharacter.isLetterOrDigit(c);
2811        // END andorid-changed
2812    }
2813
2814    /**
2815     * Indicates whether the specified code point is a letter or a digit.
2816     *
2817     * @param codePoint
2818     *            the code point to check.
2819     * @return {@code true} if {@code codePoint} is a letter or a digit;
2820     *         {@code false} otherwise.
2821     */
2822    public static boolean isLetterOrDigit(int codePoint) {
2823        return UCharacter.isLetterOrDigit(codePoint);
2824    }
2825
2826    /**
2827     * Indicates whether the specified character is a lower case letter.
2828     *
2829     * @param c
2830     *            the character to check.
2831     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2832     *         otherwise.
2833     */
2834    public static boolean isLowerCase(char c) {
2835        // BEGIN android-changed
2836        // // Optimized case for ASCII
2837        // if ('a' <= c && c <= 'z') {
2838        //     return true;
2839        // }
2840        // if (c < 128) {
2841        //     return false;
2842        // }
2843        //
2844        // return getType(c) == LOWERCASE_LETTER;
2845        return UCharacter.isLowerCase(c);
2846        // END android-changed
2847    }
2848
2849    /**
2850     * Indicates whether the specified code point is a lower case letter.
2851     *
2852     * @param codePoint
2853     *            the code point to check.
2854     * @return {@code true} if {@code codePoint} is a lower case letter;
2855     *         {@code false} otherwise.
2856     */
2857    public static boolean isLowerCase(int codePoint) {
2858        return UCharacter.isLowerCase(codePoint);
2859    }
2860
2861    /**
2862     * Indicates whether the specified character is a Java space.
2863     *
2864     * @param c
2865     *            the character to check.
2866     * @return {@code true} if {@code c} is a Java space; {@code false}
2867     *         otherwise.
2868     * @deprecated Use {@link #isWhitespace(char)}
2869     */
2870    @Deprecated
2871    public static boolean isSpace(char c) {
2872        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2873    }
2874
2875    /**
2876     * Indicates whether the specified character is a Unicode space character.
2877     * That is, if it is a member of one of the Unicode categories Space
2878     * Separator, Line Separator, or Paragraph Separator.
2879     *
2880     * @param c
2881     *            the character to check.
2882     * @return {@code true} if {@code c} is a Unicode space character,
2883     *         {@code false} otherwise.
2884     */
2885    public static boolean isSpaceChar(char c) {
2886        // BEGIN android-changed
2887        // if (c == 0x20 || c == 0xa0 || c == 0x1680) {
2888        //     return true;
2889        // }
2890        // if (c < 0x2000) {
2891        //     return false;
2892        // }
2893        // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f
2894        //         || c == 0x3000;
2895        return UCharacter.isSpaceChar(c);
2896        // END android-changed
2897    }
2898
2899    /**
2900     * Indicates whether the specified code point is a Unicode space character.
2901     * That is, if it is a member of one of the Unicode categories Space
2902     * Separator, Line Separator, or Paragraph Separator.
2903     *
2904     * @param codePoint
2905     *            the code point to check.
2906     * @return {@code true} if {@code codePoint} is a Unicode space character,
2907     *         {@code false} otherwise.
2908     */
2909    public static boolean isSpaceChar(int codePoint) {
2910        return UCharacter.isSpaceChar(codePoint);
2911    }
2912
2913    /**
2914     * Indicates whether the specified character is a titlecase character.
2915     *
2916     * @param c
2917     *            the character to check.
2918     * @return {@code true} if {@code c} is a titlecase character, {@code false}
2919     *         otherwise.
2920     */
2921    public static boolean isTitleCase(char c) {
2922        // BEGIN android-changed
2923        // if (c == '\u01c5' || c == '\u01c8' || c == '\u01cb' || c == '\u01f2') {
2924        //     return true;
2925        // }
2926        // if (c >= '\u1f88' && c <= '\u1ffc') {
2927        //     // 0x1f88 - 0x1f8f, 0x1f98 - 0x1f9f, 0x1fa8 - 0x1faf
2928        //     if (c > '\u1faf') {
2929        //         return c == '\u1fbc' || c == '\u1fcc' || c == '\u1ffc';
2930        //     }
2931        //     int last = c & 0xf;
2932        //     return last >= 8 && last <= 0xf;
2933        // }
2934        // return false;
2935        return UCharacter.isTitleCase(c);
2936        // END android-changed
2937    }
2938
2939    /**
2940     * Indicates whether the specified code point is a titlecase character.
2941     *
2942     * @param codePoint
2943     *            the code point to check.
2944     * @return {@code true} if {@code codePoint} is a titlecase character,
2945     *         {@code false} otherwise.
2946     */
2947    public static boolean isTitleCase(int codePoint) {
2948        return UCharacter.isTitleCase(codePoint);
2949    }
2950
2951    /**
2952     * Indicates whether the specified character is valid as part of a Unicode
2953     * identifier other than the first character.
2954     *
2955     * @param c
2956     *            the character to check.
2957     * @return {@code true} if {@code c} is valid as part of a Unicode
2958     *         identifier; {@code false} otherwise.
2959     */
2960    public static boolean isUnicodeIdentifierPart(char c) {
2961        // BEGIN android-changed
2962        // int type = getType(c);
2963        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2964        //         || type == CONNECTOR_PUNCTUATION
2965        //         || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2966        //         || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK
2967        //         || isIdentifierIgnorable(c);
2968        return UCharacter.isUnicodeIdentifierPart(c);
2969        // END android-changed
2970    }
2971
2972    /**
2973     * Indicates whether the specified code point is valid as part of a Unicode
2974     * identifier other than the first character.
2975     *
2976     * @param codePoint
2977     *            the code point to check.
2978     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
2979     *         identifier; {@code false} otherwise.
2980     */
2981    public static boolean isUnicodeIdentifierPart(int codePoint) {
2982        return UCharacter.isUnicodeIdentifierPart(codePoint);
2983    }
2984
2985    /**
2986     * Indicates whether the specified character is a valid initial character
2987     * for a Unicode identifier.
2988     *
2989     * @param c
2990     *            the character to check.
2991     * @return {@code true} if {@code c} is a valid first character for a
2992     *         Unicode identifier; {@code false} otherwise.
2993     */
2994    public static boolean isUnicodeIdentifierStart(char c) {
2995        // BEGIN android-changed
2996        // int type = getType(c);
2997        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2998        //         || type == LETTER_NUMBER;
2999        return UCharacter.isUnicodeIdentifierStart(c);
3000        // END android-changed
3001    }
3002
3003    /**
3004     * Indicates whether the specified code point is a valid initial character
3005     * for a Unicode identifier.
3006     *
3007     * @param codePoint
3008     *            the code point to check.
3009     * @return {@code true} if {@code codePoint} is a valid first character for
3010     *         a Unicode identifier; {@code false} otherwise.
3011     */
3012    public static boolean isUnicodeIdentifierStart(int codePoint) {
3013        return UCharacter.isUnicodeIdentifierStart(codePoint);
3014    }
3015
3016    /**
3017     * Indicates whether the specified character is an upper case letter.
3018     *
3019     * @param c
3020     *            the character to check.
3021     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3022     *         otherwise.
3023     */
3024    public static boolean isUpperCase(char c) {
3025        // Optimized case for ASCII
3026        if ('A' <= c && c <= 'Z') {
3027            return true;
3028        }
3029        if (c < 128) {
3030            return false;
3031        }
3032        // BEGIN android-changed
3033        return UCharacter.isUpperCase(c);
3034        // END android-changed
3035    }
3036
3037    /**
3038     * Indicates whether the specified code point is an upper case letter.
3039     *
3040     * @param codePoint
3041     *            the code point to check.
3042     * @return {@code true} if {@code codePoint} is a upper case letter;
3043     *         {@code false} otherwise.
3044     */
3045    public static boolean isUpperCase(int codePoint) {
3046        return UCharacter.isUpperCase(codePoint);
3047    }
3048
3049    /**
3050     * Indicates whether the specified character is a whitespace character in
3051     * Java.
3052     *
3053     * @param c
3054     *            the character to check.
3055     * @return {@code true} if the supplied {@code c} is a whitespace character
3056     *         in Java; {@code false} otherwise.
3057     */
3058    public static boolean isWhitespace(char c) {
3059        // BEGIN android-changed
3060        // // Optimized case for ASCII
3061        // if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) {
3062        //     return true;
3063        // }
3064        // if (c == 0x1680) {
3065        //     return true;
3066        // }
3067        // if (c < 0x2000 || c == 0x2007) {
3068        //     return false;
3069        // }
3070        // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000;
3071        return UCharacter.isWhitespace(c);
3072        // END android-changed
3073    }
3074
3075    /**
3076     * Indicates whether the specified code point is a whitespace character in
3077     * Java.
3078     *
3079     * @param codePoint
3080     *            the code point to check.
3081     * @return {@code true} if the supplied {@code c} is a whitespace character
3082     *         in Java; {@code false} otherwise.
3083     */
3084    public static boolean isWhitespace(int codePoint) {
3085        //FIXME depends on ICU when the codePoint is '\u2007'
3086        return UCharacter.isWhitespace(codePoint);
3087
3088    }
3089
3090    /**
3091     * Reverses the order of the first and second byte in the specified
3092     * character.
3093     *
3094     * @param c
3095     *            the character to reverse.
3096     * @return the character with reordered bytes.
3097     */
3098    public static char reverseBytes(char c) {
3099        return (char)((c<<8) | (c>>8));
3100    }
3101
3102    /**
3103     * Returns the lower case equivalent for the specified character if the
3104     * character is an upper case letter. Otherwise, the specified character is
3105     * returned unchanged.
3106     *
3107     * @param c
3108     *            the character
3109     * @return if {@code c} is an upper case character then its lower case
3110     *         counterpart, otherwise just {@code c}.
3111     */
3112    public static char toLowerCase(char c) {
3113        // BEGIN android-changed
3114        // // Optimized case for ASCII
3115        // if ('A' <= c && c <= 'Z') {
3116        //     return (char) (c + ('a' - 'A'));
3117        // }
3118        // if (c < 192) {// || c == 215 || (c > 222 && c < 256)) {
3119        //     return c;
3120        // }
3121        // if (c<1000) {
3122        //     return (char)lowercaseValuesCache[c-192];
3123        // }
3124        //
3125        // int result = BinarySearch.binarySearchRange(lowercaseKeys, c);
3126        // if (result >= 0) {
3127        //     boolean by2 = false;
3128        //     char start = lowercaseKeys.charAt(result);
3129        //     char end = lowercaseValues[result * 2];
3130        //     if ((start & 0x8000) != (end & 0x8000)) {
3131        //         end ^= 0x8000;
3132        //         by2 = true;
3133        //     }
3134        //     if (c <= end) {
3135        //         if (by2 && (c & 1) != (start & 1)) {
3136        //             return c;
3137        //         }
3138        //         char mapping = lowercaseValues[result * 2 + 1];
3139        //         return (char) (c + mapping);
3140        //     }
3141        // }
3142        // return c;
3143        return (char)UCharacter.toLowerCase(c);
3144        // END android-changed
3145    }
3146
3147    /**
3148     * Returns the lower case equivalent for the specified code point if it is
3149     * an upper case letter. Otherwise, the specified code point is returned
3150     * unchanged.
3151     *
3152     * @param codePoint
3153     *            the code point to check.
3154     * @return if {@code codePoint} is an upper case character then its lower
3155     *         case counterpart, otherwise just {@code codePoint}.
3156     */
3157    public static int toLowerCase(int codePoint) {
3158        return UCharacter.toLowerCase(codePoint);
3159    }
3160
3161    @Override
3162    public String toString() {
3163        return String.valueOf(value);
3164    }
3165
3166    /**
3167     * Converts the specified character to its string representation.
3168     *
3169     * @param value
3170     *            the character to convert.
3171     * @return the character converted to a string.
3172     */
3173    public static String toString(char value) {
3174        return String.valueOf(value);
3175    }
3176
3177    /**
3178     * Returns the title case equivalent for the specified character if it
3179     * exists. Otherwise, the specified character is returned unchanged.
3180     *
3181     * @param c
3182     *            the character to convert.
3183     * @return the title case equivalent of {@code c} if it exists, otherwise
3184     *         {@code c}.
3185     */
3186    public static char toTitleCase(char c) {
3187        // BEGIN android-changed
3188        // if (isTitleCase(c)) {
3189        //     return c;
3190        // }
3191        // int result = BinarySearch.binarySearch(titlecaseKeys, c);
3192        // if (result >= 0) {
3193        //     return titlecaseValues[result];
3194        // }
3195        // return toUpperCase(c);
3196        return (char)UCharacter.toTitleCase(c);
3197        // ENd android-changed
3198    }
3199
3200    /**
3201     * Returns the title case equivalent for the specified code point if it
3202     * exists. Otherwise, the specified code point is returned unchanged.
3203     *
3204     * @param codePoint
3205     *            the code point to convert.
3206     * @return the title case equivalent of {@code codePoint} if it exists,
3207     *         otherwise {@code codePoint}.
3208     */
3209    public static int toTitleCase(int codePoint) {
3210        return UCharacter.toTitleCase(codePoint);
3211    }
3212
3213    /**
3214     * Returns the upper case equivalent for the specified character if the
3215     * character is a lower case letter. Otherwise, the specified character is
3216     * returned unchanged.
3217     *
3218     * @param c
3219     *            the character to convert.
3220     * @return if {@code c} is a lower case character then its upper case
3221     *         counterpart, otherwise just {@code c}.
3222     */
3223    public static char toUpperCase(char c) {
3224        // BEGIN android-changed
3225        // // Optimized case for ASCII
3226        // if ('a' <= c && c <= 'z') {
3227        //     return (char) (c - ('a' - 'A'));
3228        // }
3229        // if (c < 181) {
3230        //     return c;
3231        // }
3232        // if (c<1000) {
3233        //     return (char)uppercaseValuesCache[(int)c-181];
3234        // }
3235        // int result = BinarySearch.binarySearchRange(uppercaseKeys, c);
3236        // if (result >= 0) {
3237        //     boolean by2 = false;
3238        //     char start = uppercaseKeys.charAt(result);
3239        //     char end = uppercaseValues[result * 2];
3240        //     if ((start & 0x8000) != (end & 0x8000)) {
3241        //         end ^= 0x8000;
3242        //         by2 = true;
3243        //     }
3244        //     if (c <= end) {
3245        //         if (by2 && (c & 1) != (start & 1)) {
3246        //             return c;
3247        //         }
3248        //         char mapping = uppercaseValues[result * 2 + 1];
3249        //         return (char) (c + mapping);
3250        //     }
3251        // }
3252        // return c;
3253        return (char)UCharacter.toUpperCase(c);
3254        // END android-changed
3255    }
3256
3257    /**
3258     * Returns the upper case equivalent for the specified code point if the
3259     * code point is a lower case letter. Otherwise, the specified code point is
3260     * returned unchanged.
3261     *
3262     * @param codePoint
3263     *            the code point to convert.
3264     * @return if {@code codePoint} is a lower case character then its upper
3265     *         case counterpart, otherwise just {@code codePoint}.
3266     */
3267    public static int toUpperCase(int codePoint) {
3268        return UCharacter.toUpperCase(codePoint);
3269    }
3270
3271}
3272