Character.java revision 726ac583d69b37db03c6279af5b36df7b837ede1
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.lang;
19
20import java.io.Serializable;
21// BEGIN android-removed
22// import java.util.SortedMap;
23// import java.util.TreeMap;
24//
25// import org.apache.harmony.luni.util.BinarySearch;
26// END android-removed
27
28// BEGIN android-changed
29import com.ibm.icu4jni.lang.UCharacter;
30// END android-changed
31
32/**
33 * The wrapper for the primitive type {@code char}. This class also provides a
34 * number of utility methods for working with characters.
35 * <p>
36 * Character data is based upon the Unicode Standard, 4.0. The Unicode
37 * specification, character tables and other information are available at <a
38 * href="http://www.unicode.org/">http://www.unicode.org/</a>.
39 * <p>
40 * Unicode characters are referred to as <i>code points</i>. The range of valid
41 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
42 * is the code point range U+0000 to U+FFFF. Characters above the BMP are
43 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
44 * encoding and {@code char} pairs are used to represent code points in the
45 * supplementary range. A pair of {@code char} values that represent a
46 * supplementary character are made up of a <i>high surrogate</i> with a value
47 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of
48 * 0xDC00 to 0xDFFF.
49 * <p>
50 * On the Java platform a {@code char} value represents either a single BMP code
51 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type
52 * is used to represent all Unicode code points.
53 *
54 * @since 1.0
55 */
56public final class Character implements Serializable, Comparable<Character> {
57    private static final long serialVersionUID = 3786198910865385080L;
58
59    private final char value;
60
61    /**
62     * The minimum {@code Character} value.
63     */
64    public static final char MIN_VALUE = '\u0000';
65
66    /**
67     * The maximum {@code Character} value.
68     */
69    public static final char MAX_VALUE = '\uffff';
70
71    /**
72     * The minimum radix used for conversions between characters and integers.
73     */
74    public static final int MIN_RADIX = 2;
75
76    /**
77     * The maximum radix used for conversions between characters and integers.
78     */
79    public static final int MAX_RADIX = 36;
80
81    /**
82     * The {@link Class} object that represents the primitive type {@code char}.
83     */
84    @SuppressWarnings("unchecked")
85    public static final Class<Character> TYPE
86            = (Class<Character>) char[].class.getComponentType();
87
88    // Note: This can't be set to "char.class", since *that* is
89    // defined to be "java.lang.Character.TYPE";
90
91    /**
92     * Unicode category constant Cn.
93     */
94    public static final byte UNASSIGNED = 0;
95
96    /**
97     * Unicode category constant Lu.
98     */
99    public static final byte UPPERCASE_LETTER = 1;
100
101    /**
102     * Unicode category constant Ll.
103     */
104    public static final byte LOWERCASE_LETTER = 2;
105
106    /**
107     * Unicode category constant Lt.
108     */
109    public static final byte TITLECASE_LETTER = 3;
110
111    /**
112     * Unicode category constant Lm.
113     */
114    public static final byte MODIFIER_LETTER = 4;
115
116    /**
117     * Unicode category constant Lo.
118     */
119    public static final byte OTHER_LETTER = 5;
120
121    /**
122     * Unicode category constant Mn.
123     */
124    public static final byte NON_SPACING_MARK = 6;
125
126    /**
127     * Unicode category constant Me.
128     */
129    public static final byte ENCLOSING_MARK = 7;
130
131    /**
132     * Unicode category constant Mc.
133     */
134    public static final byte COMBINING_SPACING_MARK = 8;
135
136    /**
137     * Unicode category constant Nd.
138     */
139    public static final byte DECIMAL_DIGIT_NUMBER = 9;
140
141    /**
142     * Unicode category constant Nl.
143     */
144    public static final byte LETTER_NUMBER = 10;
145
146    /**
147     * Unicode category constant No.
148     */
149    public static final byte OTHER_NUMBER = 11;
150
151    /**
152     * Unicode category constant Zs.
153     */
154    public static final byte SPACE_SEPARATOR = 12;
155
156    /**
157     * Unicode category constant Zl.
158     */
159    public static final byte LINE_SEPARATOR = 13;
160
161    /**
162     * Unicode category constant Zp.
163     */
164    public static final byte PARAGRAPH_SEPARATOR = 14;
165
166    /**
167     * Unicode category constant Cc.
168     */
169    public static final byte CONTROL = 15;
170
171    /**
172     * Unicode category constant Cf.
173     */
174    public static final byte FORMAT = 16;
175
176    /**
177     * Unicode category constant Co.
178     */
179    public static final byte PRIVATE_USE = 18;
180
181    /**
182     * Unicode category constant Cs.
183     */
184    public static final byte SURROGATE = 19;
185
186    /**
187     * Unicode category constant Pd.
188     */
189    public static final byte DASH_PUNCTUATION = 20;
190
191    /**
192     * Unicode category constant Ps.
193     */
194    public static final byte START_PUNCTUATION = 21;
195
196    /**
197     * Unicode category constant Pe.
198     */
199    public static final byte END_PUNCTUATION = 22;
200
201    /**
202     * Unicode category constant Pc.
203     */
204    public static final byte CONNECTOR_PUNCTUATION = 23;
205
206    /**
207     * Unicode category constant Po.
208     */
209    public static final byte OTHER_PUNCTUATION = 24;
210
211    /**
212     * Unicode category constant Sm.
213     */
214    public static final byte MATH_SYMBOL = 25;
215
216    /**
217     * Unicode category constant Sc.
218     */
219    public static final byte CURRENCY_SYMBOL = 26;
220
221    /**
222     * Unicode category constant Sk.
223     */
224    public static final byte MODIFIER_SYMBOL = 27;
225
226    /**
227     * Unicode category constant So.
228     */
229    public static final byte OTHER_SYMBOL = 28;
230
231    /**
232     * Unicode category constant Pi.
233     *
234     * @since 1.4
235     */
236    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
237
238    /**
239     * Unicode category constant Pf.
240     *
241     * @since 1.4
242     */
243    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
244
245    /**
246     * Unicode bidirectional constant.
247     *
248     * @since 1.4
249     */
250    public static final byte DIRECTIONALITY_UNDEFINED = -1;
251
252    /**
253     * Unicode bidirectional constant L.
254     *
255     * @since 1.4
256     */
257    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
258
259    /**
260     * Unicode bidirectional constant R.
261     *
262     * @since 1.4
263     */
264    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
265
266    /**
267     * Unicode bidirectional constant AL.
268     *
269     * @since 1.4
270     */
271    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
272
273    /**
274     * Unicode bidirectional constant EN.
275     *
276     * @since 1.4
277     */
278    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
279
280    /**
281     * Unicode bidirectional constant ES.
282     *
283     * @since 1.4
284     */
285    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
286
287    /**
288     * Unicode bidirectional constant ET.
289     *
290     * @since 1.4
291     */
292    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
293
294    /**
295     * Unicode bidirectional constant AN.
296     *
297     * @since 1.4
298     */
299    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
300
301    /**
302     * Unicode bidirectional constant CS.
303     *
304     * @since 1.4
305     */
306    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
307
308    /**
309     * Unicode bidirectional constant NSM.
310     *
311     * @since 1.4
312     */
313    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
314
315    /**
316     * Unicode bidirectional constant BN.
317     *
318     * @since 1.4
319     */
320    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
321
322    /**
323     * Unicode bidirectional constant B.
324     *
325     * @since 1.4
326     */
327    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
328
329    /**
330     * Unicode bidirectional constant S.
331     *
332     * @since 1.4
333     */
334    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
335
336    /**
337     * Unicode bidirectional constant WS.
338     *
339     * @since 1.4
340     */
341    public static final byte DIRECTIONALITY_WHITESPACE = 12;
342
343    /**
344     * Unicode bidirectional constant ON.
345     *
346     * @since 1.4
347     */
348    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
349
350    /**
351     * Unicode bidirectional constant LRE.
352     *
353     * @since 1.4
354     */
355    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
356
357    /**
358     * Unicode bidirectional constant LRO.
359     *
360     * @since 1.4
361     */
362    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
363
364    /**
365     * Unicode bidirectional constant RLE.
366     *
367     * @since 1.4
368     */
369    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
370
371    /**
372     * Unicode bidirectional constant RLO.
373     *
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
377
378    /**
379     * Unicode bidirectional constant PDF.
380     *
381     * @since 1.4
382     */
383    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
384
385    /**
386     * The minimum value of a high surrogate or leading surrogate unit in UTF-16
387     * encoding, {@code '\uD800'}.
388     *
389     * @since 1.5
390     */
391    public static final char MIN_HIGH_SURROGATE = '\uD800';
392
393    /**
394     * The maximum value of a high surrogate or leading surrogate unit in UTF-16
395     * encoding, {@code '\uDBFF'}.
396     *
397     * @since 1.5
398     */
399    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
400
401    /**
402     * The minimum value of a low surrogate or trailing surrogate unit in UTF-16
403     * encoding, {@code '\uDC00'}.
404     *
405     * @since 1.5
406     */
407    public static final char MIN_LOW_SURROGATE = '\uDC00';
408
409    /**
410     * The maximum value of a low surrogate or trailing surrogate unit in UTF-16
411     * encoding, {@code '\uDFFF'}.
412     *
413     * @since 1.5
414     */
415    public static final char MAX_LOW_SURROGATE = '\uDFFF';
416
417    /**
418     * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}.
419     *
420     * @since 1.5
421     */
422    public static final char MIN_SURROGATE = '\uD800';
423
424    /**
425     * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}.
426     *
427     * @since 1.5
428     */
429    public static final char MAX_SURROGATE = '\uDFFF';
430
431    /**
432     * The minimum value of a supplementary code point, {@code U+010000}.
433     *
434     * @since 1.5
435     */
436    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
437
438    /**
439     * The minimum code point value, {@code U+0000}.
440     *
441     * @since 1.5
442     */
443    public static final int MIN_CODE_POINT = 0x000000;
444
445    /**
446     * The maximum code point value, {@code U+10FFFF}.
447     *
448     * @since 1.5
449     */
450    public static final int MAX_CODE_POINT = 0x10FFFF;
451
452    /**
453     * The number of bits required to represent a {@code Character} value
454     * unsigned form.
455     *
456     * @since 1.5
457     */
458    public static final int SIZE = 16;
459
460    // BEGIN android-removed
461    // Unicode 3.0.1 (same as Unicode 3.0.0)
462    // private static final String bidiKeys = ...
463
464    // private static final char[] bidiValues = ...
465
466    // private static final char[] mirrored = ...
467
468    // Unicode 3.0.1 (same as Unicode 3.0.0)
469    // private static final String typeKeys = ...
470
471    // private static final char[] typeValues = ...
472
473    // private static final int[] typeValuesCache = ...
474
475    // Unicode 3.0.1 (same as Unicode 3.0.0)
476    // private static final String uppercaseKeys = ...
477
478    // private static final char[] uppercaseValues = ...
479
480    // private static final int[] uppercaseValuesCache = ...
481
482    // private static final String lowercaseKeys = ...
483
484    // private static final char[] lowercaseValues = ...
485
486    // private static final int[] lowercaseValuesCache = ...
487
488    // private static final String digitKeys = ...
489
490    // private static final char[] digitValues = ...
491    // END android-removed
492
493    // BEGIN android-note
494    // put this in a helper class so that it's only initialized on demand?
495    // END android-note
496    private static final char[] typeTags = "\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0003\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0002"
497            .getValue();
498
499    // BEGIN android-note
500    // put this in a helper class so that it's only initialized on demand?
501    // END android-note
502    private static final byte[] DIRECTIONALITY = new byte[] {
503            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
504            DIRECTIONALITY_EUROPEAN_NUMBER,
505            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
506            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
507            DIRECTIONALITY_ARABIC_NUMBER,
508            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
509            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
510            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
511            DIRECTIONALITY_OTHER_NEUTRALS,
512            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
513            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
514            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
515            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
516            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
517            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
518            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
519
520    private static final int ISJAVASTART = 1;
521
522    private static final int ISJAVAPART = 2;
523
524    // BEGIN android-removed
525    // Unicode 3.0.1 (same as Unicode 3.0.0)
526    // private static final String titlecaseKeys = ...
527
528    // private static final char[] titlecaseValues = ...
529
530    // Unicode 3.0.0 (NOT the same as Unicode 3.0.1)
531    // private static final String numericKeys = ...
532
533    // private static final char[] numericValues = ...
534    // END android-removed
535
536    /*
537     * Represents a subset of the Unicode character set.
538     */
539    public static class Subset {
540        String name;
541
542        /**
543         * Constructs a new {@code Subset}.
544         *
545         * @param string
546         *            this subset's name.
547         */
548        protected Subset(String string) {
549            if (string == null) {
550                throw new NullPointerException();
551            }
552            name = string;
553        }
554
555        /**
556         * Compares this character subset with the specified object. Uses
557         * {@link java.lang.Object#equals(Object)} to do the comparison.
558         *
559         * @param object
560         *            the object to compare this character subset with.
561         * @return {@code true} if {@code object} is this subset, that is, if
562         *         {@code object == this}; {@code false} otherwise.
563         */
564        @Override
565        public final boolean equals(Object object) {
566            return super.equals(object);
567        }
568
569        /**
570         * Returns the integer hash code for this character subset.
571         *
572         * @return this subset's hash code, which is the hash code computed by
573         *         {@link java.lang.Object#hashCode()}.
574         */
575        @Override
576        public final int hashCode() {
577            return super.hashCode();
578        }
579
580        /**
581         * Returns the string representation of this subset.
582         *
583         * @return this subset's name.
584         */
585        @Override
586        public final String toString() {
587            return name;
588        }
589    }
590
591    /**
592     * Represents a block of Unicode characters, as defined by the Unicode 4.0.1
593     * specification.
594     *
595     * @since 1.2
596     */
597    public static final class UnicodeBlock extends Subset {
598        /**
599         * The &quot;Surrogates Area&quot; Unicode Block.
600         *
601         * @deprecated As of Java 5, this block has been replaced by
602         *             {@link #HIGH_SURROGATES},
603         *             {@link #HIGH_PRIVATE_USE_SURROGATES} and
604         *             {@link #LOW_SURROGATES}.
605         */
606        @Deprecated
607        public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0);
608        /**
609         * The &quot;Basic Latin&quot; Unicode Block.
610         *
611         * @since 1.2
612         */
613        public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f);
614        /**
615         * The &quot;Latin-1 Supplement&quot; Unicode Block.
616         *
617         * @since 1.2
618         */
619        public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff);
620        /**
621         * The &quot;Latin Extended-A&quot; Unicode Block.
622         *
623         * @since 1.2
624         */
625        public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f);
626        /**
627         * The &quot;Latin Extended-B&quot; Unicode Block.
628         *
629         * @since 1.2
630         */
631        public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f);
632        /**
633         * The &quot;IPA Extensions&quot; Unicode Block.
634         *
635         * @since 1.2
636         */
637        public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af);
638        /**
639         * The &quot;Spacing Modifier Letters&quot; Unicode Block.
640         *
641         * @since 1.2
642         */
643        public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
644        /**
645         * The &quot;Combining Diacritical Marks&quot; Unicode Block.
646         *
647         * @since 1.2
648         */
649        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
650        /**
651         * The &quot;Greek and Coptic&quot; Unicode Block. Previously referred
652         * to as &quot;Greek&quot;.
653         *
654         * @since 1.2
655         */
656        public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff);
657        /**
658         * The &quot;Cyrillic&quot; Unicode Block.
659         *
660         * @since 1.2
661         */
662        public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff);
663        /**
664         * The &quot;Cyrillic Supplement&quot; Unicode Block. Previously
665         * referred to as &quot;Cyrillic Supplementary&quot;.
666         *
667         * @since 1.5
668         */
669        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
670        /**
671         * The &quot;Armenian&quot; Unicode Block.
672         *
673         * @since 1.2
674         */
675        public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f);
676        /**
677         * The &quot;Hebrew&quot; Unicode Block.
678         *
679         * @since 1.2
680         */
681        public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff);
682        /**
683         * The &quot;Arabic&quot; Unicode Block.
684         *
685         * @since 1.2
686         */
687        public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff);
688        /**
689         * The &quot;Syriac&quot; Unicode Block.
690         *
691         * @since 1.4
692         */
693        public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f);
694        /**
695         * The &quot;Thaana&quot; Unicode Block.
696         *
697         * @since 1.4
698         */
699        public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf);
700        /**
701         * The &quot;Devanagari&quot; Unicode Block.
702         *
703         * @since 1.2
704         */
705        public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f);
706        /**
707         * The &quot;Bengali&quot; Unicode Block.
708         *
709         * @since 1.2
710         */
711        public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff);
712        /**
713         * The &quot;Gurmukhi&quot; Unicode Block.
714         *
715         * @since 1.2
716         */
717        public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f);
718        /**
719         * The &quot;Gujarati&quot; Unicode Block.
720         *
721         * @since 1.2
722         */
723        public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff);
724        /**
725         * The &quot;Oriya&quot; Unicode Block.
726         *
727         * @since 1.2
728         */
729        public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f);
730        /**
731         * The &quot;Tamil&quot; Unicode Block.
732         *
733         * @since 1.2
734         */
735        public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff);
736        /**
737         * The &quot;Telugu&quot; Unicode Block.
738         *
739         * @since 1.2
740         */
741        public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f);
742        /**
743         * The &quot;Kannada&quot; Unicode Block.
744         *
745         * @since 1.2
746         */
747        public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff);
748        /**
749         * The &quot;Malayalam&quot; Unicode Block.
750         *
751         * @since 1.2
752         */
753        public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f);
754        /**
755         * The &quot;Sinhala&quot; Unicode Block.
756         *
757         * @since 1.4
758         */
759        public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff);
760        /**
761         * The &quot;Thai&quot; Unicode Block.
762         *
763         * @since 1.2
764         */
765        public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f);
766        /**
767         * The &quot;Lao&quot; Unicode Block.
768         *
769         * @since 1.2
770         */
771        public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff);
772        /**
773         * The &quot;Tibetan&quot; Unicode Block.
774         *
775         * @since 1.2
776         */
777        public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff);
778        /**
779         * The &quot;Myanmar&quot; Unicode Block.
780         *
781         * @since 1.4
782         */
783        public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f);
784        /**
785         * The &quot;Georgian&quot; Unicode Block.
786         *
787         * @since 1.2
788         */
789        public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff);
790        /**
791         * The &quot;Hangul Jamo&quot; Unicode Block.
792         *
793         * @since 1.2
794         */
795        public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff);
796        /**
797         * The &quot;Ethiopic&quot; Unicode Block.
798         *
799         * @since 1.4
800         */
801        public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f);
802        /**
803         * The &quot;Cherokee&quot; Unicode Block.
804         *
805         * @since 1.4
806         */
807        public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff);
808        /**
809         * The &quot;Unified Canadian Aboriginal Syllabics&quot; Unicode Block.
810         *
811         * @since 1.4
812         */
813        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
814        /**
815         * The &quot;Ogham&quot; Unicode Block.
816         *
817         * @since 1.4
818         */
819        public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f);
820        /**
821         * The &quot;Runic&quot; Unicode Block.
822         *
823         * @since 1.4
824         */
825        public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff);
826        /**
827         * The &quot;Tagalog&quot; Unicode Block.
828         *
829         * @since 1.5
830         */
831        public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f);
832        /**
833         * The &quot;Hanunoo&quot; Unicode Block.
834         *
835         * @since 1.5
836         */
837        public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f);
838        /**
839         * The &quot;Buhid&quot; Unicode Block.
840         *
841         * @since 1.5
842         */
843        public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f);
844        /**
845         * The &quot;Tagbanwa&quot; Unicode Block.
846         *
847         * @since 1.5
848         */
849        public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f);
850        /**
851         * The &quot;Khmer&quot; Unicode Block.
852         *
853         * @since 1.4
854         */
855        public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff);
856        /**
857         * The &quot;Mongolian&quot; Unicode Block.
858         *
859         * @since 1.4
860         */
861        public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af);
862        /**
863         * The &quot;Limbu&quot; Unicode Block.
864         *
865         * @since 1.5
866         */
867        public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f);
868        /**
869         * The &quot;Tai Le&quot; Unicode Block.
870         *
871         * @since 1.5
872         */
873        public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f);
874        /**
875         * The &quot;Khmer Symbols&quot; Unicode Block.
876         *
877         * @since 1.5
878         */
879        public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff);
880        /**
881         * The &quot;Phonetic Extensions&quot; Unicode Block.
882         *
883         * @since 1.5
884         */
885        public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
886        /**
887         * The &quot;Latin Extended Additional&quot; Unicode Block.
888         *
889         * @since 1.2
890         */
891        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
892        /**
893         * The &quot;Greek Extended&quot; Unicode Block.
894         *
895         * @since 1.2
896         */
897        public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff);
898        /**
899         * The &quot;General Punctuation&quot; Unicode Block.
900         *
901         * @since 1.2
902         */
903        public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f);
904        /**
905         * The &quot;Superscripts and Subscripts&quot; Unicode Block.
906         *
907         * @since 1.2
908         */
909        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
910        /**
911         * The &quot;Currency Symbols&quot; Unicode Block.
912         *
913         * @since 1.2
914         */
915        public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
916        /**
917         * The &quot;Combining Diacritical Marks for Symbols&quot; Unicode
918         * Block. Previously referred to as &quot;Combining Marks for
919         * Symbols&quot;.
920         *
921         * @since 1.2
922         */
923        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
924        /**
925         * The &quot;Letterlike Symbols&quot; Unicode Block.
926         *
927         * @since 1.2
928         */
929        public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
930        /**
931         * The &quot;Number Forms&quot; Unicode Block.
932         *
933         * @since 1.2
934         */
935        public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f);
936        /**
937         * The &quot;Arrows&quot; Unicode Block.
938         *
939         * @since 1.2
940         */
941        public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff);
942        /**
943         * The &quot;Mathematical Operators&quot; Unicode Block.
944         *
945         * @since 1.2
946         */
947        public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
948        /**
949         * The &quot;Miscellaneous Technical&quot; Unicode Block.
950         *
951         * @since 1.2
952         */
953        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
954        /**
955         * The &quot;Control Pictures&quot; Unicode Block.
956         *
957         * @since 1.2
958         */
959        public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f);
960        /**
961         * The &quot;Optical Character Recognition&quot; Unicode Block.
962         *
963         * @since 1.2
964         */
965        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
966        /**
967         * The &quot;Enclosed Alphanumerics&quot; Unicode Block.
968         *
969         * @since 1.2
970         */
971        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
972        /**
973         * The &quot;Box Drawing&quot; Unicode Block.
974         *
975         * @since 1.2
976         */
977        public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f);
978        /**
979         * The &quot;Block Elements&quot; Unicode Block.
980         *
981         * @since 1.2
982         */
983        public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f);
984        /**
985         * The &quot;Geometric Shapes&quot; Unicode Block.
986         *
987         * @since 1.2
988         */
989        public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
990        /**
991         * The &quot;Miscellaneous Symbols&quot; Unicode Block.
992         *
993         * @since 1.2
994         */
995        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
996        /**
997         * The &quot;Dingbats&quot; Unicode Block.
998         *
999         * @since 1.2
1000         */
1001        public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf);
1002        /**
1003         * The &quot;Miscellaneous Mathematical Symbols-A&quot; Unicode Block.
1004         *
1005         * @since 1.5
1006         */
1007        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
1008        /**
1009         * The &quot;Supplemental Arrows-A&quot; Unicode Block.
1010         *
1011         * @since 1.5
1012         */
1013        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
1014        /**
1015         * The &quot;Braille Patterns&quot; Unicode Block.
1016         *
1017         * @since 1.4
1018         */
1019        public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff);
1020        /**
1021         * The &quot;Supplemental Arrows-B&quot; Unicode Block.
1022         *
1023         * @since 1.5
1024         */
1025        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1026        /**
1027         * The &quot;Miscellaneous Mathematical Symbols-B&quot; Unicode Block.
1028         *
1029         * @since 1.5
1030         */
1031        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1032        /**
1033         * The &quot;Supplemental Mathematical Operators&quot; Unicode Block.
1034         *
1035         * @since 1.5
1036         */
1037        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1038        /**
1039         * The &quot;Miscellaneous Symbols and Arrows&quot; Unicode Block.
1040         *
1041         * @since 1.2
1042         */
1043        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1044        /**
1045         * The &quot;CJK Radicals Supplement&quot; Unicode Block.
1046         *
1047         * @since 1.4
1048         */
1049        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1050        /**
1051         * The &quot;Kangxi Radicals&quot; Unicode Block.
1052         *
1053         * @since 1.4
1054         */
1055        public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf);
1056        /**
1057         * The &quot;Ideographic Description Characters&quot; Unicode Block.
1058         *
1059         * @since 1.4
1060         */
1061        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1062        /**
1063         * The &quot;CJK Symbols and Punctuation&quot; Unicode Block.
1064         *
1065         * @since 1.2
1066         */
1067        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1068        /**
1069         * The &quot;Hiragana&quot; Unicode Block.
1070         *
1071         * @since 1.2
1072         */
1073        public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f);
1074        /**
1075         * The &quot;Katakana&quot; Unicode Block.
1076         *
1077         * @since 1.2
1078         */
1079        public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff);
1080        /**
1081         * The &quot;Bopomofo&quot; Unicode Block.
1082         *
1083         * @since 1.2
1084         */
1085        public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f);
1086        /**
1087         * The &quot;Hangul Compatibility Jamo&quot; Unicode Block.
1088         *
1089         * @since 1.2
1090         */
1091        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1092        /**
1093         * The &quot;Kanbun&quot; Unicode Block.
1094         *
1095         * @since 1.2
1096         */
1097        public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f);
1098        /**
1099         * The &quot;Bopomofo Extended&quot; Unicode Block.
1100         *
1101         * @since 1.4
1102         */
1103        public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1104        /**
1105         * The &quot;Katakana Phonetic Extensions&quot; Unicode Block.
1106         *
1107         * @since 1.5
1108         */
1109        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1110        /**
1111         * The &quot;Enclosed CJK Letters and Months&quot; Unicode Block.
1112         *
1113         * @since 1.2
1114         */
1115        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1116        /**
1117         * The &quot;CJK Compatibility&quot; Unicode Block.
1118         *
1119         * @since 1.2
1120         */
1121        public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff);
1122        /**
1123         * The &quot;CJK Unified Ideographs Extension A&quot; Unicode Block.
1124         *
1125         * @since 1.4
1126         */
1127        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1128        /**
1129         * The &quot;Yijing Hexagram Symbols&quot; Unicode Block.
1130         *
1131         * @since 1.5
1132         */
1133        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1134        /**
1135         * The &quot;CJK Unified Ideographs&quot; Unicode Block.
1136         *
1137         * @since 1.2
1138         */
1139        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1140        /**
1141         * The &quot;Yi Syllables&quot; Unicode Block.
1142         *
1143         * @since 1.4
1144         */
1145        public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f);
1146        /**
1147         * The &quot;Yi Radicals&quot; Unicode Block.
1148         *
1149         * @since 1.4
1150         */
1151        public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf);
1152        /**
1153         * The &quot;Hangul Syllables&quot; Unicode Block.
1154         *
1155         * @since 1.2
1156         */
1157        public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af);
1158        /**
1159         * The &quot;High Surrogates&quot; Unicode Block. This block represents
1160         * code point values in the high surrogate range 0xD800 to 0xDB7F
1161         */
1162        public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f);
1163        /**
1164         * The &quot;High Private Use Surrogates&quot; Unicode Block. This block
1165         * represents code point values in the high surrogate range 0xDB80 to
1166         * 0xDBFF
1167         */
1168        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1169        /**
1170         * The &quot;Low Surrogates&quot; Unicode Block. This block represents
1171         * code point values in the low surrogate range 0xDC00 to 0xDFFF
1172         */
1173        public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff);
1174        /**
1175         * The &quot;Private Use Area&quot; Unicode Block.
1176         *
1177         * @since 1.2
1178         */
1179        public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1180        /**
1181         * The &quot;CJK Compatibility Ideographs&quot; Unicode Block.
1182         *
1183         * @since 1.2
1184         */
1185        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1186        /**
1187         * The &quot;Alphabetic Presentation Forms&quot; Unicode Block.
1188         *
1189         * @since 1.2
1190         */
1191        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1192        /**
1193         * The &quot;Arabic Presentation Forms-A&quot; Unicode Block.
1194         *
1195         * @since 1.2
1196         */
1197        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1198        /**
1199         * The &quot;Variation Selectors&quot; Unicode Block.
1200         *
1201         * @since 1.5
1202         */
1203        public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1204        /**
1205         * The &quot;Combining Half Marks&quot; Unicode Block.
1206         *
1207         * @since 1.2
1208         */
1209        public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1210        /**
1211         * The &quot;CJK Compatibility Forms&quot; Unicode Block.
1212         *
1213         * @since 1.2
1214         */
1215        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1216        /**
1217         * The &quot;Small Form Variants&quot; Unicode Block.
1218         *
1219         * @since 1.2
1220         */
1221        public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1222        /**
1223         * The &quot;Arabic Presentation Forms-B&quot; Unicode Block.
1224         *
1225         * @since 1.2
1226         */
1227        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1228        /**
1229         * The &quot;Halfwidth and Fullwidth Forms&quot; Unicode Block.
1230         *
1231         * @since 1.2
1232         */
1233        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1234        /**
1235         * The &quot;Specials&quot; Unicode Block.
1236         *
1237         * @since 1.2
1238         */
1239        public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff);
1240        /**
1241         * The &quot;Linear B Syllabary&quot; Unicode Block.
1242         *
1243         * @since 1.2
1244         */
1245        public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1246        /**
1247         * The &quot;Linear B Ideograms&quot; Unicode Block.
1248         *
1249         * @since 1.5
1250         */
1251        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1252        /**
1253         * The &quot;Aegean Numbers&quot; Unicode Block.
1254         *
1255         * @since 1.5
1256         */
1257        public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f);
1258        /**
1259         * The &quot;Old Italic&quot; Unicode Block.
1260         *
1261         * @since 1.5
1262         */
1263        public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f);
1264        /**
1265         * The &quot;Gothic&quot; Unicode Block.
1266         *
1267         * @since 1.5
1268         */
1269        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f);
1270        /**
1271         * The &quot;Ugaritic&quot; Unicode Block.
1272         *
1273         * @since 1.5
1274         */
1275        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f);
1276        /**
1277         * The &quot;Deseret&quot; Unicode Block.
1278         *
1279         * @since 1.5
1280         */
1281        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f);
1282        /**
1283         * The &quot;Shavian&quot; Unicode Block.
1284         *
1285         * @since 1.5
1286         */
1287        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f);
1288        /**
1289         * The &quot;Osmanya&quot; Unicode Block.
1290         *
1291         * @since 1.5
1292         */
1293        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af);
1294        /**
1295         * The &quot;Cypriot Syllabary&quot; Unicode Block.
1296         *
1297         * @since 1.5
1298         */
1299        public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1300        /**
1301         * The &quot;Byzantine Musical Symbols&quot; Unicode Block.
1302         *
1303         * @since 1.5
1304         */
1305        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1306        /**
1307         * The &quot;Musical Symbols&quot; Unicode Block.
1308         *
1309         * @since 1.5
1310         */
1311        public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1312        /**
1313         * The &quot;Tai Xuan Jing Symbols&quot; Unicode Block.
1314         *
1315         * @since 1.5
1316         */
1317        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1318        /**
1319         * The &quot;Mathematical Alphanumeric Symbols&quot; Unicode Block.
1320         *
1321         * @since 1.5
1322         */
1323        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1324        /**
1325         * The &quot;CJK Unified Ideographs Extension B&quot; Unicode Block.
1326         *
1327         * @since 1.5
1328         */
1329        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1330        /**
1331         * The &quot;CJK Compatibility Ideographs Supplement&quot; Unicode Block.
1332         *
1333         * @since 1.5
1334         */
1335        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f);
1336        /**
1337         * The &quot;Tags&quot; Unicode Block.
1338         *
1339         * @since 1.5
1340         */
1341        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f);
1342        /**
1343         * The &quot;Variation Selectors Supplement&quot; Unicode Block.
1344         *
1345         * @since 1.5
1346         */
1347        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1348        /**
1349         * The &quot;Supplementary Private Use Area-A&quot; Unicode Block.
1350         *
1351         * @since 1.5
1352         */
1353        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1354        /**
1355         * The &quot;Supplementary Private Use Area-B&quot; Unicode Block.
1356         *
1357         * @since 1.5
1358         */
1359        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1360
1361        /*
1362         * All of the UnicodeBlocks with valid ranges in ascending order.
1363         */
1364        private static UnicodeBlock[] BLOCKS;
1365
1366        // BEGIN android-changed
1367        // /*
1368        //  * A SortedMap (String.CASE_INSENSITIVE_ORDER) with keys that represents
1369        //  * valid block names and values of the UnicodeBlock constant they map
1370        //  * to.
1371        //  */
1372        // private static final SortedMap<String, UnicodeBlock> BLOCKS_BY_NAME = ...;
1373        // END android-changed
1374
1375        /**
1376         * Retrieves the constant that corresponds to the specified block name.
1377         * The block names are defined by the Unicode 4.0.1 specification in the
1378         * {@code Blocks-4.0.1.txt} file.
1379         * <p>
1380         * Block names may be one of the following:
1381         * <ul>
1382         * <li>Canonical block name, as defined by the Unicode specification;
1383         * case-insensitive.</li>
1384         * <li>Canonical block name without any spaces, as defined by the
1385         * Unicode specification; case-insensitive.</li>
1386         * <li>{@code UnicodeBlock} constant identifier. This is determined by
1387         * uppercasing the canonical name and replacing all spaces and hyphens
1388         * with underscores.</li>
1389         * </ul>
1390         *
1391         * @param blockName
1392         *            the name of the block to retrieve.
1393         * @return the UnicodeBlock constant corresponding to {@code blockName}.
1394         * @throws NullPointerException
1395         *             if {@code blockName} is {@code null}.
1396         * @throws IllegalArgumentException
1397         *             if {@code blockName} is not a valid block name.
1398         * @since 1.5
1399         */
1400        public static final UnicodeBlock forName(String blockName) {
1401            // BEGIN android-note
1402            // trying to get closer to the RI which defines this as final.
1403            // END android-note
1404            if (blockName == null) {
1405                throw new NullPointerException();
1406            }
1407            // BEGIN android-changed
1408            if (BLOCKS == null) {
1409                BLOCKS = UCharacter.getBlockTable();
1410            }
1411            int block = UCharacter.forName(blockName);
1412            if (block == -1) {
1413                if(blockName.equals("SURROGATES_AREA")) {
1414                    return SURROGATES_AREA;
1415                } else if(blockName.equalsIgnoreCase("greek")) {
1416                    return GREEK;
1417                } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") ||
1418                        blockName.equals("Combining Marks for Symbols") ||
1419                        blockName.equals("CombiningMarksforSymbols")) {
1420                    return COMBINING_MARKS_FOR_SYMBOLS;
1421                }
1422                throw new IllegalArgumentException();
1423            }
1424            return BLOCKS[block];
1425            // END android-changed
1426        }
1427
1428        /**
1429         * Gets the constant for the Unicode block that contains the specified
1430         * character.
1431         *
1432         * @param c
1433         *            the character for which to get the {@code UnicodeBlock}
1434         *            constant.
1435         * @return the {@code UnicodeBlock} constant for the block that contains
1436         *         {@code c}, or {@code null} if {@code c} does not belong to
1437         *         any defined block.
1438         */
1439        public static UnicodeBlock of(char c) {
1440            return of((int) c);
1441        }
1442
1443        /**
1444         * Gets the constant for the Unicode block that contains the specified
1445         * Unicode code point.
1446         *
1447         * @param codePoint
1448         *            the Unicode code point for which to get the
1449         *            {@code UnicodeBlock} constant.
1450         * @return the {@code UnicodeBlock} constant for the block that contains
1451         *         {@code codePoint}, or {@code null} if {@code codePoint} does
1452         *         not belong to any defined block.
1453         * @throws IllegalArgumentException
1454         *             if {@code codePoint} is not a valid Unicode code point.
1455         * @since 1.5
1456         */
1457        public static UnicodeBlock of(int codePoint) {
1458            if (!isValidCodePoint(codePoint)) {
1459                throw new IllegalArgumentException();
1460            }
1461            // BEGIN android-changed
1462            if (BLOCKS == null) {
1463                BLOCKS = UCharacter.getBlockTable();
1464            }
1465            int block = UCharacter.of(codePoint);
1466            if(block == -1 || block >= BLOCKS.length) {
1467                return null;
1468            }
1469            return BLOCKS[block];
1470            // END android-changed
1471        }
1472
1473        // BEGIN android-changed
1474        private UnicodeBlock(String blockName, int start, int end) {
1475            super(blockName);
1476        }
1477        // END android-changed
1478    }
1479
1480    /**
1481     * Constructs a new {@code Character} with the specified primitive char
1482     * value.
1483     *
1484     * @param value
1485     *            the primitive char value to store in the new instance.
1486     */
1487    public Character(char value) {
1488        this.value = value;
1489    }
1490
1491    /**
1492     * Gets the primitive value of this character.
1493     *
1494     * @return this object's primitive value.
1495     */
1496    public char charValue() {
1497        return value;
1498    }
1499
1500    /**
1501     * Compares this object to the specified character object to determine their
1502     * relative order.
1503     *
1504     * @param c
1505     *            the character object to compare this object to.
1506     * @return {@code 0} if the value of this character and the value of
1507     *         {@code c} are equal; a positive value if the value of this
1508     *         character is greater than the value of {@code c}; a negative
1509     *         value if the value of this character is less than the value of
1510     *         {@code c}.
1511     * @see java.lang.Comparable
1512     * @since 1.2
1513     */
1514    public int compareTo(Character c) {
1515        return value - c.value;
1516    }
1517
1518    /**
1519     * Returns a {@code Character} instance for the {@code char} value passed.
1520     * <p>
1521     * If it is not necessary to get a new {@code Character} instance, it is
1522     * recommended to use this method instead of the constructor, since it
1523     * maintains a cache of instances which may result in better performance.
1524     *
1525     * @param c
1526     *            the char value for which to get a {@code Character} instance.
1527     * @return the {@code Character} instance for {@code c}.
1528     * @since 1.5
1529     */
1530    public static Character valueOf(char c) {
1531        return c < 128 ? SMALL_VALUES[c] : new Character(c);
1532    }
1533
1534    /**
1535     * A cache of instances used by {@link #valueOf(char)} and auto-boxing
1536     */
1537    private static final Character[] SMALL_VALUES = new Character[128];
1538
1539    static {
1540        for(int i = 0; i < 128; i++) {
1541            SMALL_VALUES[i] = new Character((char) i);
1542        }
1543    }
1544    /**
1545     * Indicates whether {@code codePoint} is a valid Unicode code point.
1546     *
1547     * @param codePoint
1548     *            the code point to test.
1549     * @return {@code true} if {@code codePoint} is a valid Unicode code point;
1550     *         {@code false} otherwise.
1551     * @since 1.5
1552     */
1553    public static boolean isValidCodePoint(int codePoint) {
1554        return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1555    }
1556
1557    /**
1558     * Indicates whether {@code codePoint} is within the supplementary code
1559     * point range.
1560     *
1561     * @param codePoint
1562     *            the code point to test.
1563     * @return {@code true} if {@code codePoint} is within the supplementary
1564     *         code point range; {@code false} otherwise.
1565     * @since 1.5
1566     */
1567    public static boolean isSupplementaryCodePoint(int codePoint) {
1568        return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
1569    }
1570
1571    /**
1572     * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
1573     * that is used for representing supplementary characters in UTF-16
1574     * encoding.
1575     *
1576     * @param ch
1577     *            the character to test.
1578     * @return {@code true} if {@code ch} is a high-surrogate code unit;
1579     *         {@code false} otherwise.
1580     * @see #isLowSurrogate(char)
1581     * @since 1.5
1582     */
1583    public static boolean isHighSurrogate(char ch) {
1584        return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
1585    }
1586
1587    /**
1588     * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit
1589     * that is used for representing supplementary characters in UTF-16
1590     * encoding.
1591     *
1592     * @param ch
1593     *            the character to test.
1594     * @return {@code true} if {@code ch} is a low-surrogate code unit;
1595     *         {@code false} otherwise.
1596     * @see #isHighSurrogate(char)
1597     * @since 1.5
1598     */
1599    public static boolean isLowSurrogate(char ch) {
1600        return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
1601    }
1602
1603    /**
1604     * Indicates whether the specified character pair is a valid surrogate pair.
1605     *
1606     * @param high
1607     *            the high surrogate unit to test.
1608     * @param low
1609     *            the low surrogate unit to test.
1610     * @return {@code true} if {@code high} is a high-surrogate code unit and
1611     *         {@code low} is a low-surrogate code unit; {@code false}
1612     *         otherwise.
1613     * @see #isHighSurrogate(char)
1614     * @see #isLowSurrogate(char)
1615     * @since 1.5
1616     */
1617    public static boolean isSurrogatePair(char high, char low) {
1618        return (isHighSurrogate(high) && isLowSurrogate(low));
1619    }
1620
1621    /**
1622     * Calculates the number of {@code char} values required to represent the
1623     * specified Unicode code point. This method checks if the {@code codePoint}
1624     * is greater than or equal to {@code 0x10000}, in which case {@code 2} is
1625     * returned, otherwise {@code 1}. To test if the code point is valid, use
1626     * the {@link #isValidCodePoint(int)} method.
1627     *
1628     * @param codePoint
1629     *            the code point for which to calculate the number of required
1630     *            chars.
1631     * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise.
1632     * @see #isValidCodePoint(int)
1633     * @see #isSupplementaryCodePoint(int)
1634     * @since 1.5
1635     */
1636    public static int charCount(int codePoint) {
1637        return (codePoint >= 0x10000 ? 2 : 1);
1638    }
1639
1640    /**
1641     * Converts a surrogate pair into a Unicode code point. This method assumes
1642     * that the pair are valid surrogates. If the pair are <i>not</i> valid
1643     * surrogates, then the result is indeterminate. The
1644     * {@link #isSurrogatePair(char, char)} method should be used prior to this
1645     * method to validate the pair.
1646     *
1647     * @param high
1648     *            the high surrogate unit.
1649     * @param low
1650     *            the low surrogate unit.
1651     * @return the Unicode code point corresponding to the surrogate unit pair.
1652     * @see #isSurrogatePair(char, char)
1653     * @since 1.5
1654     */
1655    public static int toCodePoint(char high, char low) {
1656        // See RFC 2781, Section 2.2
1657        // http://www.faqs.org/rfcs/rfc2781.html
1658        int h = (high & 0x3FF) << 10;
1659        int l = low & 0x3FF;
1660        return (h | l) + 0x10000;
1661    }
1662
1663    /**
1664     * Returns the code point at {@code index} in the specified sequence of
1665     * character units. If the unit at {@code index} is a high-surrogate unit,
1666     * {@code index + 1} is less than the length of the sequence and the unit at
1667     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1668     * point represented by the pair is returned; otherwise the {@code char}
1669     * value at {@code index} is returned.
1670     *
1671     * @param seq
1672     *            the source sequence of {@code char} units.
1673     * @param index
1674     *            the position in {@code seq} from which to retrieve the code
1675     *            point.
1676     * @return the Unicode code point or {@code char} value at {@code index} in
1677     *         {@code seq}.
1678     * @throws NullPointerException
1679     *             if {@code seq} is {@code null}.
1680     * @throws IndexOutOfBoundsException
1681     *             if the {@code index} is negative or greater than or equal to
1682     *             the length of {@code seq}.
1683     * @since 1.5
1684     */
1685    public static int codePointAt(CharSequence seq, int index) {
1686        if (seq == null) {
1687            throw new NullPointerException();
1688        }
1689        int len = seq.length();
1690        if (index < 0 || index >= len) {
1691            throw new IndexOutOfBoundsException();
1692        }
1693
1694        char high = seq.charAt(index++);
1695        if (index >= len) {
1696            return high;
1697        }
1698        char low = seq.charAt(index);
1699        if (isSurrogatePair(high, low)) {
1700            return toCodePoint(high, low);
1701        }
1702        return high;
1703    }
1704
1705    /**
1706     * Returns the code point at {@code index} in the specified array of
1707     * character units. If the unit at {@code index} is a high-surrogate unit,
1708     * {@code index + 1} is less than the length of the array and the unit at
1709     * {@code index + 1} is a low-surrogate unit, then the supplementary code
1710     * point represented by the pair is returned; otherwise the {@code char}
1711     * value at {@code index} is returned.
1712     *
1713     * @param seq
1714     *            the source array of {@code char} units.
1715     * @param index
1716     *            the position in {@code seq} from which to retrieve the code
1717     *            point.
1718     * @return the Unicode code point or {@code char} value at {@code index} in
1719     *         {@code seq}.
1720     * @throws NullPointerException
1721     *             if {@code seq} is {@code null}.
1722     * @throws IndexOutOfBoundsException
1723     *             if the {@code index} is negative or greater than or equal to
1724     *             the length of {@code seq}.
1725     * @since 1.5
1726     */
1727    public static int codePointAt(char[] seq, int index) {
1728        if (seq == null) {
1729            throw new NullPointerException();
1730        }
1731        int len = seq.length;
1732        if (index < 0 || index >= len) {
1733            throw new IndexOutOfBoundsException();
1734        }
1735
1736        char high = seq[index++];
1737        if (index >= len) {
1738            return high;
1739        }
1740        char low = seq[index];
1741        if (isSurrogatePair(high, low)) {
1742            return toCodePoint(high, low);
1743        }
1744        return high;
1745    }
1746
1747    /**
1748     * Returns the code point at {@code index} in the specified array of
1749     * character units, where {@code index} has to be less than {@code limit}.
1750     * If the unit at {@code index} is a high-surrogate unit, {@code index + 1}
1751     * is less than {@code limit} and the unit at {@code index + 1} is a
1752     * low-surrogate unit, then the supplementary code point represented by the
1753     * pair is returned; otherwise the {@code char} value at {@code index} is
1754     * returned.
1755     *
1756     * @param seq
1757     *            the source array of {@code char} units.
1758     * @param index
1759     *            the position in {@code seq} from which to get the code point.
1760     * @param limit
1761     *            the index after the last unit in {@code seq} that can be used.
1762     * @return the Unicode code point or {@code char} value at {@code index} in
1763     *         {@code seq}.
1764     * @throws NullPointerException
1765     *             if {@code seq} is {@code null}.
1766     * @throws IndexOutOfBoundsException
1767     *             if {@code index < 0}, {@code index >= limit},
1768     *             {@code limit < 0} or if {@code limit} is greater than the
1769     *             length of {@code seq}.
1770     * @since 1.5
1771     */
1772    public static int codePointAt(char[] seq, int index, int limit) {
1773        if (index < 0 || index >= limit || limit < 0 || limit > seq.length) {
1774            throw new IndexOutOfBoundsException();
1775        }
1776
1777        char high = seq[index++];
1778        if (index >= limit) {
1779            return high;
1780        }
1781        char low = seq[index];
1782        if (isSurrogatePair(high, low)) {
1783            return toCodePoint(high, low);
1784        }
1785        return high;
1786    }
1787
1788    /**
1789     * Returns the code point that preceds {@code index} in the specified
1790     * sequence of character units. If the unit at {@code index - 1} is a
1791     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1792     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1793     * point represented by the pair is returned; otherwise the {@code char}
1794     * value at {@code index - 1} is returned.
1795     *
1796     * @param seq
1797     *            the source sequence of {@code char} units.
1798     * @param index
1799     *            the position in {@code seq} following the code
1800     *            point that should be returned.
1801     * @return the Unicode code point or {@code char} value before {@code index}
1802     *         in {@code seq}.
1803     * @throws NullPointerException
1804     *             if {@code seq} is {@code null}.
1805     * @throws IndexOutOfBoundsException
1806     *             if the {@code index} is less than 1 or greater than the
1807     *             length of {@code seq}.
1808     * @since 1.5
1809     */
1810    public static int codePointBefore(CharSequence seq, int index) {
1811        if (seq == null) {
1812            throw new NullPointerException();
1813        }
1814        int len = seq.length();
1815        if (index < 1 || index > len) {
1816            throw new IndexOutOfBoundsException();
1817        }
1818
1819        char low = seq.charAt(--index);
1820        if (--index < 0) {
1821            return low;
1822        }
1823        char high = seq.charAt(index);
1824        if (isSurrogatePair(high, low)) {
1825            return toCodePoint(high, low);
1826        }
1827        return low;
1828    }
1829
1830    /**
1831     * Returns the code point that preceds {@code index} in the specified
1832     * array of character units. If the unit at {@code index - 1} is a
1833     * low-surrogate unit, {@code index - 2} is not negative and the unit at
1834     * {@code index - 2} is a high-surrogate unit, then the supplementary code
1835     * point represented by the pair is returned; otherwise the {@code char}
1836     * value at {@code index - 1} is returned.
1837     *
1838     * @param seq
1839     *            the source array of {@code char} units.
1840     * @param index
1841     *            the position in {@code seq} following the code
1842     *            point that should be returned.
1843     * @return the Unicode code point or {@code char} value before {@code index}
1844     *         in {@code seq}.
1845     * @throws NullPointerException
1846     *             if {@code seq} is {@code null}.
1847     * @throws IndexOutOfBoundsException
1848     *             if the {@code index} is less than 1 or greater than the
1849     *             length of {@code seq}.
1850     * @since 1.5
1851     */
1852    public static int codePointBefore(char[] seq, int index) {
1853        if (seq == null) {
1854            throw new NullPointerException();
1855        }
1856        int len = seq.length;
1857        if (index < 1 || index > len) {
1858            throw new IndexOutOfBoundsException();
1859        }
1860
1861        char low = seq[--index];
1862        if (--index < 0) {
1863            return low;
1864        }
1865        char high = seq[index];
1866        if (isSurrogatePair(high, low)) {
1867            return toCodePoint(high, low);
1868        }
1869        return low;
1870    }
1871
1872    /**
1873     * Returns the code point that preceds the {@code index} in the specified
1874     * array of character units and is not less than {@code start}. If the unit
1875     * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not
1876     * less than {@code start} and the unit at {@code index - 2} is a
1877     * high-surrogate unit, then the supplementary code point represented by the
1878     * pair is returned; otherwise the {@code char} value at {@code index - 1}
1879     * is returned.
1880     *
1881     * @param seq
1882     *            the source array of {@code char} units.
1883     * @param index
1884     *            the position in {@code seq} following the code point that
1885     *            should be returned.
1886     * @param start
1887     *            the index of the first element in {@code seq}.
1888     * @return the Unicode code point or {@code char} value before {@code index}
1889     *         in {@code seq}.
1890     * @throws NullPointerException
1891     *             if {@code seq} is {@code null}.
1892     * @throws IndexOutOfBoundsException
1893     *             if the {@code index <= start}, {@code start < 0},
1894     *             {@code index} is greater than the length of {@code seq}, or
1895     *             if {@code start} is equal or greater than the length of
1896     *             {@code seq}.
1897     * @since 1.5
1898     */
1899    public static int codePointBefore(char[] seq, int index, int start) {
1900        if (seq == null) {
1901            throw new NullPointerException();
1902        }
1903        int len = seq.length;
1904        if (index <= start || index > len || start < 0 || start >= len) {
1905            throw new IndexOutOfBoundsException();
1906        }
1907
1908        char low = seq[--index];
1909        if (--index < start) {
1910            return low;
1911        }
1912        char high = seq[index];
1913        if (isSurrogatePair(high, low)) {
1914            return toCodePoint(high, low);
1915        }
1916        return low;
1917    }
1918
1919    /**
1920     * Converts the specified Unicode code point into a UTF-16 encoded sequence
1921     * and copies the value(s) into the char array {@code dst}, starting at
1922     * index {@code dstIndex}.
1923     *
1924     * @param codePoint
1925     *            the Unicode code point to encode.
1926     * @param dst
1927     *            the destination array to copy the encoded value into.
1928     * @param dstIndex
1929     *            the index in {@code dst} from where to start copying.
1930     * @return the number of {@code char} value units copied into {@code dst}.
1931     * @throws IllegalArgumentException
1932     *             if {@code codePoint} is not a valid Unicode code point.
1933     * @throws NullPointerException
1934     *             if {@code dst} is {@code null}.
1935     * @throws IndexOutOfBoundsException
1936     *             if {@code dstIndex} is negative, greater than or equal to
1937     *             {@code dst.length} or equals {@code dst.length - 1} when
1938     *             {@code codePoint} is a
1939     *             {@link #isSupplementaryCodePoint(int) supplementary code point}.
1940     * @since 1.5
1941     */
1942    public static int toChars(int codePoint, char[] dst, int dstIndex) {
1943        if (!isValidCodePoint(codePoint)) {
1944            throw new IllegalArgumentException();
1945        }
1946        if (dst == null) {
1947            throw new NullPointerException();
1948        }
1949        if (dstIndex < 0 || dstIndex >= dst.length) {
1950            throw new IndexOutOfBoundsException();
1951        }
1952
1953        if (isSupplementaryCodePoint(codePoint)) {
1954            if (dstIndex == dst.length - 1) {
1955                throw new IndexOutOfBoundsException();
1956            }
1957            // See RFC 2781, Section 2.1
1958            // http://www.faqs.org/rfcs/rfc2781.html
1959            int cpPrime = codePoint - 0x10000;
1960            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
1961            int low = 0xDC00 | (cpPrime & 0x3FF);
1962            dst[dstIndex] = (char) high;
1963            dst[dstIndex + 1] = (char) low;
1964            return 2;
1965        }
1966
1967        dst[dstIndex] = (char) codePoint;
1968        return 1;
1969    }
1970
1971    /**
1972     * Converts the specified Unicode code point into a UTF-16 encoded sequence
1973     * and returns it as a char array.
1974     *
1975     * @param codePoint
1976     *            the Unicode code point to encode.
1977     * @return the UTF-16 encoded char sequence. If {@code codePoint} is a
1978     *         {@link #isSupplementaryCodePoint(int) supplementary code point},
1979     *         then the returned array contains two characters, otherwise it
1980     *         contains just one character.
1981     * @throws IllegalArgumentException
1982     *             if {@code codePoint} is not a valid Unicode code point.
1983     * @since 1.5
1984     */
1985    public static char[] toChars(int codePoint) {
1986        if (!isValidCodePoint(codePoint)) {
1987            throw new IllegalArgumentException();
1988        }
1989
1990        if (isSupplementaryCodePoint(codePoint)) {
1991            int cpPrime = codePoint - 0x10000;
1992            int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
1993            int low = 0xDC00 | (cpPrime & 0x3FF);
1994            return new char[] { (char) high, (char) low };
1995        }
1996        return new char[] { (char) codePoint };
1997    }
1998
1999    /**
2000     * Counts the number of Unicode code points in the subsequence of the
2001     * specified character sequence, as delineated by {@code beginIndex} and
2002     * {@code endIndex}. Any surrogate values with missing pair values will be
2003     * counted as one code point.
2004     *
2005     * @param seq
2006     *            the {@code CharSequence} to look through.
2007     * @param beginIndex
2008     *            the inclusive index to begin counting at.
2009     * @param endIndex
2010     *            the exclusive index to stop counting at.
2011     * @return the number of Unicode code points.
2012     * @throws NullPointerException
2013     *             if {@code seq} is {@code null}.
2014     * @throws IndexOutOfBoundsException
2015     *             if {@code beginIndex < 0}, {@code beginIndex > endIndex} or
2016     *             if {@code endIndex} is greater than the length of {@code seq}.
2017     * @since 1.5
2018     */
2019    public static int codePointCount(CharSequence seq, int beginIndex,
2020            int endIndex) {
2021        if (seq == null) {
2022            throw new NullPointerException();
2023        }
2024        int len = seq.length();
2025        if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2026            throw new IndexOutOfBoundsException();
2027        }
2028
2029        int result = 0;
2030        for (int i = beginIndex; i < endIndex; i++) {
2031            char c = seq.charAt(i);
2032            if (isHighSurrogate(c)) {
2033                if (++i < endIndex) {
2034                    c = seq.charAt(i);
2035                    if (!isLowSurrogate(c)) {
2036                        result++;
2037                    }
2038                }
2039            }
2040            result++;
2041        }
2042        return result;
2043    }
2044
2045    /**
2046     * Counts the number of Unicode code points in the subsequence of the
2047     * specified char array, as delineated by {@code offset} and {@code count}.
2048     * Any surrogate values with missing pair values will be counted as one code
2049     * point.
2050     *
2051     * @param seq
2052     *            the char array to look through
2053     * @param offset
2054     *            the inclusive index to begin counting at.
2055     * @param count
2056     *            the number of {@code char} values to look through in
2057     *            {@code seq}.
2058     * @return the number of Unicode code points.
2059     * @throws NullPointerException
2060     *             if {@code seq} is {@code null}.
2061     * @throws IndexOutOfBoundsException
2062     *             if {@code offset < 0}, {@code count < 0} or if
2063     *             {@code offset + count} is greater than the length of
2064     *             {@code seq}.
2065     * @since 1.5
2066     */
2067    public static int codePointCount(char[] seq, int offset, int count) {
2068        if (seq == null) {
2069            throw new NullPointerException();
2070        }
2071        int len = seq.length;
2072        int endIndex = offset + count;
2073        if (offset < 0 || count < 0 || endIndex > len) {
2074            throw new IndexOutOfBoundsException();
2075        }
2076
2077        int result = 0;
2078        for (int i = offset; i < endIndex; i++) {
2079            char c = seq[i];
2080            if (isHighSurrogate(c)) {
2081                if (++i < endIndex) {
2082                    c = seq[i];
2083                    if (!isLowSurrogate(c)) {
2084                        result++;
2085                    }
2086                }
2087            }
2088            result++;
2089        }
2090        return result;
2091    }
2092
2093    /**
2094     * Determines the index in the specified character sequence that is offset
2095     * {@code codePointOffset} code points from {@code index}.
2096     *
2097     * @param seq
2098     *            the character sequence to find the index in.
2099     * @param index
2100     *            the start index in {@code seq}.
2101     * @param codePointOffset
2102     *            the number of code points to look backwards or forwards; may
2103     *            be a negative or positive value.
2104     * @return the index in {@code seq} that is {@code codePointOffset} code
2105     *         points away from {@code index}.
2106     * @throws NullPointerException
2107     *             if {@code seq} is {@code null}.
2108     * @throws IndexOutOfBoundsException
2109     *             if {@code index < 0}, {@code index} is greater than the
2110     *             length of {@code seq}, or if there are not enough values in
2111     *             {@code seq} to skip {@code codePointOffset} code points
2112     *             forwards or backwards (if {@code codePointOffset} is
2113     *             negative) from {@code index}.
2114     * @since 1.5
2115     */
2116    public static int offsetByCodePoints(CharSequence seq, int index,
2117            int codePointOffset) {
2118        if (seq == null) {
2119            throw new NullPointerException();
2120        }
2121        int len = seq.length();
2122        if (index < 0 || index > len) {
2123            throw new IndexOutOfBoundsException();
2124        }
2125
2126        if (codePointOffset == 0) {
2127            return index;
2128        }
2129
2130        if (codePointOffset > 0) {
2131            int codePoints = codePointOffset;
2132            int i = index;
2133            while (codePoints > 0) {
2134                codePoints--;
2135                if (i >= len) {
2136                    throw new IndexOutOfBoundsException();
2137                }
2138                if (isHighSurrogate(seq.charAt(i))) {
2139                    int next = i + 1;
2140                    if (next < len && isLowSurrogate(seq.charAt(next))) {
2141                        i++;
2142                    }
2143                }
2144                i++;
2145            }
2146            return i;
2147        }
2148
2149        assert codePointOffset < 0;
2150        int codePoints = -codePointOffset;
2151        int i = index;
2152        while (codePoints > 0) {
2153            codePoints--;
2154            i--;
2155            if (i < 0) {
2156                throw new IndexOutOfBoundsException();
2157            }
2158            if (isLowSurrogate(seq.charAt(i))) {
2159                int prev = i - 1;
2160                if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2161                    i--;
2162                }
2163            }
2164        }
2165        return i;
2166    }
2167
2168    /**
2169     * Determines the index in a subsequence of the specified character array
2170     * that is offset {@code codePointOffset} code points from {@code index}.
2171     * The subsequence is delineated by {@code start} and {@code count}.
2172     *
2173     * @param seq
2174     *            the character array to find the index in.
2175     * @param start
2176     *            the inclusive index that marks the beginning of the
2177     *            subsequence.
2178     * @param count
2179     *            the number of {@code char} values to include within the
2180     *            subsequence.
2181     * @param index
2182     *            the start index in the subsequence of the char array.
2183     * @param codePointOffset
2184     *            the number of code points to look backwards or forwards; may
2185     *            be a negative or positive value.
2186     * @return the index in {@code seq} that is {@code codePointOffset} code
2187     *         points away from {@code index}.
2188     * @throws NullPointerException
2189     *             if {@code seq} is {@code null}.
2190     * @throws IndexOutOfBoundsException
2191     *             if {@code start < 0}, {@code count < 0},
2192     *             {@code index < start}, {@code index > start + count},
2193     *             {@code start + count} is greater than the length of
2194     *             {@code seq}, or if there are not enough values in
2195     *             {@code seq} to skip {@code codePointOffset} code points
2196     *             forward or backward (if {@code codePointOffset} is
2197     *             negative) from {@code index}.
2198     * @since 1.5
2199     */
2200    public static int offsetByCodePoints(char[] seq, int start, int count,
2201            int index, int codePointOffset) {
2202        if (seq == null) {
2203            throw new NullPointerException();
2204        }
2205        int end = start + count;
2206        if (start < 0 || count < 0 || end > seq.length || index < start
2207                || index > end) {
2208            throw new IndexOutOfBoundsException();
2209        }
2210
2211        if (codePointOffset == 0) {
2212            return index;
2213        }
2214
2215        if (codePointOffset > 0) {
2216            int codePoints = codePointOffset;
2217            int i = index;
2218            while (codePoints > 0) {
2219                codePoints--;
2220                if (i >= end) {
2221                    throw new IndexOutOfBoundsException();
2222                }
2223                if (isHighSurrogate(seq[i])) {
2224                    int next = i + 1;
2225                    if (next < end && isLowSurrogate(seq[next])) {
2226                        i++;
2227                    }
2228                }
2229                i++;
2230            }
2231            return i;
2232        }
2233
2234        assert codePointOffset < 0;
2235        int codePoints = -codePointOffset;
2236        int i = index;
2237        while (codePoints > 0) {
2238            codePoints--;
2239            i--;
2240            if (i < start) {
2241                throw new IndexOutOfBoundsException();
2242            }
2243            if (isLowSurrogate(seq[i])) {
2244                int prev = i - 1;
2245                if (prev >= start && isHighSurrogate(seq[prev])) {
2246                    i--;
2247                }
2248            }
2249        }
2250        return i;
2251    }
2252
2253    /**
2254     * Convenience method to determine the value of the specified character
2255     * {@code c} in the supplied radix. The value of {@code radix} must be
2256     * between MIN_RADIX and MAX_RADIX.
2257     *
2258     * @param c
2259     *            the character to determine the value of.
2260     * @param radix
2261     *            the radix.
2262     * @return the value of {@code c} in {@code radix} if {@code radix} lies
2263     *         between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise.
2264     */
2265    public static int digit(char c, int radix) {
2266        // BEGIN android-changed
2267        // if (radix >= MIN_RADIX && radix <= MAX_RADIX) {
2268        //     if (c < 128) {
2269        //         // Optimized for ASCII
2270        //         int result = -1;
2271        //         if ('0' <= c && c <= '9') {
2272        //             result = c - '0';
2273        //         } else if ('a' <= c && c <= 'z') {
2274        //             result = c - ('a' - 10);
2275        //         } else if ('A' <= c && c <= 'Z') {
2276        //             result = c - ('A' - 10);
2277        //         }
2278        //         return result < radix ? result : -1;
2279        //     }
2280        //     int result = BinarySearch.binarySearchRange(digitKeys, c);
2281        //     if (result >= 0 && c <= digitValues[result * 2]) {
2282        //         int value = (char) (c - digitValues[result * 2 + 1]);
2283        //         if (value >= radix) {
2284        //             return -1;
2285        //         }
2286        //         return value;
2287        //     }
2288        // }
2289        // return -1;
2290        return UCharacter.digit(c, radix);
2291        // ENd android-changed
2292    }
2293
2294    /**
2295     * Convenience method to determine the value of the character
2296     * {@code codePoint} in the supplied radix. The value of {@code radix} must
2297     * be between MIN_RADIX and MAX_RADIX.
2298     *
2299     * @param codePoint
2300     *            the character, including supplementary characters.
2301     * @param radix
2302     *            the radix.
2303     * @return if {@code radix} lies between {@link #MIN_RADIX} and
2304     *         {@link #MAX_RADIX} then the value of the character in the radix;
2305     *         -1 otherwise.
2306     */
2307    public static int digit(int codePoint, int radix) {
2308        return UCharacter.digit(codePoint, radix);
2309    }
2310
2311    /**
2312     * Compares this object with the specified object and indicates if they are
2313     * equal. In order to be equal, {@code object} must be an instance of
2314     * {@code Character} and have the same char value as this object.
2315     *
2316     * @param object
2317     *            the object to compare this double with.
2318     * @return {@code true} if the specified object is equal to this
2319     *         {@code Character}; {@code false} otherwise.
2320     */
2321    @Override
2322    public boolean equals(Object object) {
2323        return (object instanceof Character)
2324                && (value == ((Character) object).value);
2325    }
2326
2327    /**
2328     * Returns the character which represents the specified digit in the
2329     * specified radix. The {@code radix} must be between {@code MIN_RADIX} and
2330     * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and
2331     * smaller than {@code radix}. If any of these conditions does not hold, 0
2332     * is returned.
2333     *
2334     * @param digit
2335     *            the integer value.
2336     * @param radix
2337     *            the radix.
2338     * @return the character which represents the {@code digit} in the
2339     *         {@code radix}.
2340     */
2341    public static char forDigit(int digit, int radix) {
2342        if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
2343            if (0 <= digit && digit < radix) {
2344                return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10);
2345            }
2346        }
2347        return 0;
2348    }
2349
2350    /**
2351     * Gets the numeric value of the specified Unicode character.
2352     *
2353     * @param c
2354     *            the Unicode character to get the numeric value of.
2355     * @return a non-negative numeric integer value if a numeric value for
2356     *         {@code c} exists, -1 if there is no numeric value for {@code c},
2357     *         -2 if the numeric value can not be represented with an integer.
2358     */
2359    public static int getNumericValue(char c) {
2360        // BEGIN android-changed
2361        // if (c < 128) {
2362        //     // Optimized for ASCII
2363        //     if (c >= '0' && c <= '9') {
2364        //         return c - '0';
2365        //     }
2366        //     if (c >= 'a' && c <= 'z') {
2367        //         return c - ('a' - 10);
2368        //     }
2369        //     if (c >= 'A' && c <= 'Z') {
2370        //         return c - ('A' - 10);
2371        //     }
2372        //     return -1;
2373        // }
2374        // int result = BinarySearch.binarySearchRange(numericKeys, c);
2375        // if (result >= 0 && c <= numericValues[result * 2]) {
2376        //     char difference = numericValues[result * 2 + 1];
2377        //     if (difference == 0) {
2378        //         return -2;
2379        //     }
2380        //     // Value is always positive, must be negative value
2381        //     if (difference > c) {
2382        //         return c - (short) difference;
2383        //     }
2384        //     return c - difference;
2385        // }
2386        // return -1;
2387        return UCharacter.getNumericValue(c);
2388        // END android-changed
2389    }
2390
2391    /**
2392     * Gets the numeric value of the specified Unicode code point. For example,
2393     * the code point '\u216B' stands for the Roman number XII, which has the
2394     * numeric value 12.
2395     *
2396     * @param codePoint
2397     *            the Unicode code point to get the numeric value of.
2398     * @return a non-negative numeric integer value if a numeric value for
2399     *         {@code codePoint} exists, -1 if there is no numeric value for
2400     *         {@code codePoint}, -2 if the numeric value can not be
2401     *         represented with an integer.
2402     */
2403    public static int getNumericValue(int codePoint) {
2404        return UCharacter.getNumericValue(codePoint);
2405    }
2406
2407    /**
2408     * Gets the general Unicode category of the specified character.
2409     *
2410     * @param c
2411     *            the character to get the category of.
2412     * @return the Unicode category of {@code c}.
2413     */
2414    public static int getType(char c) {
2415        // BEGIN android-changed
2416        // if(c < 1000) {
2417        //     return typeValuesCache[(int)c];
2418        // }
2419        // int result = BinarySearch.binarySearchRange(typeKeys, c);
2420        // int high = typeValues[result * 2];
2421        // if (c <= high) {
2422        //     int code = typeValues[result * 2 + 1];
2423        //     if (code < 0x100) {
2424        //         return code;
2425        //     }
2426        //     return (c & 1) == 1 ? code >> 8 : code & 0xff;
2427        // }
2428        // return UNASSIGNED;
2429        return getType((int) c);
2430        // END android-changed
2431    }
2432
2433    /**
2434     * Gets the general Unicode category of the specified code point.
2435     *
2436     * @param codePoint
2437     *            the Unicode code point to get the category of.
2438     * @return the Unicode category of {@code codePoint}.
2439     */
2440    public static int getType(int codePoint) {
2441        // BEGIN android-changed
2442    	// if (codePoint < 1000 && codePoint > 0) {
2443    	//     return typeValuesCache[codePoint];
2444    	// }
2445        // END android-changed
2446        int type = UCharacter.getType(codePoint);
2447
2448        // the type values returned by UCharacter are not compatible with what
2449        // the spec says.RI's Character type values skip the value 17.
2450        if (type <= Character.FORMAT) {
2451            return type;
2452        }
2453        return (type + 1);
2454    }
2455
2456    /**
2457     * Gets the Unicode directionality of the specified character.
2458     *
2459     * @param c
2460     *            the character to get the directionality of.
2461     * @return the Unicode directionality of {@code c}.
2462     */
2463    public static byte getDirectionality(char c) {
2464        // BEGIN android-changed
2465        // int result = BinarySearch.binarySearchRange(bidiKeys, c);
2466        // int high = bidiValues[result * 2];
2467        // if (c <= high) {
2468        //     int code = bidiValues[result * 2 + 1];
2469        //     if (code < 0x100) {
2470        //         return (byte) (code - 1);
2471        //     }
2472        //     return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1);
2473        // }
2474        // return DIRECTIONALITY_UNDEFINED;
2475        return getDirectionality((int)c);
2476        // END android-changed
2477    }
2478
2479    /**
2480     * Gets the Unicode directionality of the specified character.
2481     *
2482     * @param codePoint
2483     *            the Unicode code point to get the directionality of.
2484     * @return the Unicode directionality of {@code codePoint}.
2485     */
2486    public static byte getDirectionality(int codePoint) {
2487        if (getType(codePoint) == Character.UNASSIGNED) {
2488            return Character.DIRECTIONALITY_UNDEFINED;
2489        }
2490
2491        byte UCDirectionality = UCharacter.getDirectionality(codePoint);
2492        if (UCDirectionality == -1) {
2493            return -1;
2494        }
2495        return DIRECTIONALITY[UCDirectionality];
2496    }
2497
2498    /**
2499     * Indicates whether the specified character is mirrored.
2500     *
2501     * @param c
2502     *            the character to check.
2503     * @return {@code true} if {@code c} is mirrored; {@code false}
2504     *         otherwise.
2505     */
2506    public static boolean isMirrored(char c) {
2507        // BEGIN android-changed
2508        // int value = c / 16;
2509        // if (value >= mirrored.length) {
2510        //     return false;
2511        // }
2512        // int bit = 1 << (c % 16);
2513        // return (mirrored[value] & bit) != 0;
2514        return isMirrored((int)c);
2515        // ENd android-changed
2516    }
2517
2518    /**
2519     * Indicates whether the specified code point is mirrored.
2520     *
2521     * @param codePoint
2522     *            the code point to check.
2523     * @return {@code true} if {@code codePoint} is mirrored, {@code false}
2524     *         otherwise.
2525     */
2526    public static boolean isMirrored(int codePoint) {
2527        return UCharacter.isMirrored(codePoint);
2528    }
2529
2530    @Override
2531    public int hashCode() {
2532        return value;
2533    }
2534
2535    /**
2536     * Indicates whether the specified character is defined in the Unicode
2537     * specification.
2538     *
2539     * @param c
2540     *            the character to check.
2541     * @return {@code true} if the general Unicode category of the character is
2542     *         not {@code UNASSIGNED}; {@code false} otherwise.
2543     */
2544    public static boolean isDefined(char c) {
2545        // BEGIN android-changed
2546        // return getType(c) != UNASSIGNED;
2547        return UCharacter.isDefined(c);
2548        // END android-changed
2549    }
2550
2551    /**
2552     * Indicates whether the specified code point is defined in the Unicode
2553     * specification.
2554     *
2555     * @param codePoint
2556     *            the code point to check.
2557     * @return {@code true} if the general Unicode category of the code point is
2558     *         not {@code UNASSIGNED}; {@code false} otherwise.
2559     */
2560    public static boolean isDefined(int codePoint) {
2561        return UCharacter.isDefined(codePoint);
2562    }
2563
2564    /**
2565     * Indicates whether the specified character is a digit.
2566     *
2567     * @param c
2568     *            the character to check.
2569     * @return {@code true} if {@code c} is a digit; {@code false}
2570     *         otherwise.
2571     */
2572    public static boolean isDigit(char c) {
2573        // Optimized case for ASCII
2574        if ('0' <= c && c <= '9') {
2575            return true;
2576        }
2577        if (c < 1632) {
2578            return false;
2579        }
2580        // BEGIN android-changed
2581        return UCharacter.isDigit(c);
2582        // END android-changed
2583    }
2584
2585    /**
2586     * Indicates whether the specified code point is a digit.
2587     *
2588     * @param codePoint
2589     *            the code point to check.
2590     * @return {@code true} if {@code codePoint} is a digit; {@code false}
2591     *         otherwise.
2592     */
2593    public static boolean isDigit(int codePoint) {
2594        return UCharacter.isDigit(codePoint);
2595    }
2596
2597    /**
2598     * Indicates whether the specified character is ignorable in a Java or
2599     * Unicode identifier.
2600     *
2601     * @param c
2602     *            the character to check.
2603     * @return {@code true} if {@code c} is ignorable; {@code false} otherwise.
2604     */
2605    public static boolean isIdentifierIgnorable(char c) {
2606        // BEGIN android-changed
2607        // return (c >= 0 && c <= 8) || (c >= 0xe && c <= 0x1b)
2608        //         || (c >= 0x7f && c <= 0x9f) || getType(c) == FORMAT;
2609        return UCharacter.isIdentifierIgnorable(c);
2610        // END android-changed
2611    }
2612
2613    /**
2614     * Indicates whether the specified code point is ignorable in a Java or
2615     * Unicode identifier.
2616     *
2617     * @param codePoint
2618     *            the code point to check.
2619     * @return {@code true} if {@code codePoint} is ignorable; {@code false}
2620     *         otherwise.
2621     */
2622    public static boolean isIdentifierIgnorable(int codePoint) {
2623        return UCharacter.isIdentifierIgnorable(codePoint);
2624    }
2625
2626    /**
2627     * Indicates whether the specified character is an ISO control character.
2628     *
2629     * @param c
2630     *            the character to check.
2631     * @return {@code true} if {@code c} is an ISO control character;
2632     *         {@code false} otherwise.
2633     */
2634    public static boolean isISOControl(char c) {
2635        return isISOControl((int)c);
2636    }
2637
2638    /**
2639     * Indicates whether the specified code point is an ISO control character.
2640     *
2641     * @param c
2642     *            the code point to check.
2643     * @return {@code true} if {@code c} is an ISO control character;
2644     *         {@code false} otherwise.
2645     */
2646    public static boolean isISOControl(int c) {
2647        return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
2648    }
2649
2650    /**
2651     * Indicates whether the specified character is a valid part of a Java
2652     * identifier other than the first character.
2653     *
2654     * @param c
2655     *            the character to check.
2656     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2657     *         {@code false} otherwise.
2658     */
2659    public static boolean isJavaIdentifierPart(char c) {
2660        // Optimized case for ASCII
2661        if (c < 128) {
2662            return (typeTags[c] & ISJAVAPART) != 0;
2663        }
2664
2665        int type = getType(c);
2666        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2667                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2668                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2669                || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK
2670                || (c >= 0x80 && c <= 0x9f) || type == FORMAT;
2671    }
2672
2673    /**
2674     * Indicates whether the specified code point is a valid part of a Java
2675     * identifier other than the first character.
2676     *
2677     * @param codePoint
2678     *            the code point to check.
2679     * @return {@code true} if {@code c} is valid as part of a Java identifier;
2680     *         {@code false} otherwise.
2681     */
2682    public static boolean isJavaIdentifierPart(int codePoint) {
2683        int type = getType(codePoint);
2684        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2685                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2686                || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2687                || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
2688                || isIdentifierIgnorable(codePoint);
2689    }
2690
2691    /**
2692     * Indicates whether the specified character is a valid first character for
2693     * a Java identifier.
2694     *
2695     * @param c
2696     *            the character to check.
2697     * @return {@code true} if {@code c} is a valid first character of a Java
2698     *         identifier; {@code false} otherwise.
2699     */
2700    public static boolean isJavaIdentifierStart(char c) {
2701        // Optimized case for ASCII
2702        if (c < 128) {
2703            return (typeTags[c] & ISJAVASTART) != 0;
2704        }
2705
2706        int type = getType(c);
2707        return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2708                || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
2709                || type == LETTER_NUMBER;
2710    }
2711
2712    /**
2713     * Indicates whether the specified code point is a valid start for a Java
2714     * identifier.
2715     *
2716     * @param codePoint
2717     *            the code point to check.
2718     * @return {@code true} if {@code codePoint} is a valid start of a Java
2719     *         identifier; {@code false} otherwise.
2720     */
2721    public static boolean isJavaIdentifierStart(int codePoint) {
2722        int type = getType(codePoint);
2723        return isLetter(codePoint) || type == CURRENCY_SYMBOL
2724                || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER;
2725    }
2726
2727    /**
2728     * Indicates whether the specified character is a Java letter.
2729     *
2730     * @param c
2731     *            the character to check.
2732     * @return {@code true} if {@code c} is a Java letter; {@code false}
2733     *         otherwise.
2734     * @deprecated Use {@link #isJavaIdentifierStart(char)}
2735     */
2736    @Deprecated
2737    public static boolean isJavaLetter(char c) {
2738        return isJavaIdentifierStart(c);
2739    }
2740
2741    /**
2742     * Indicates whether the specified character is a Java letter or digit
2743     * character.
2744     *
2745     * @param c
2746     *            the character to check.
2747     * @return {@code true} if {@code c} is a Java letter or digit;
2748     *         {@code false} otherwise.
2749     * @deprecated Use {@link #isJavaIdentifierPart(char)}
2750     */
2751    @Deprecated
2752    public static boolean isJavaLetterOrDigit(char c) {
2753        return isJavaIdentifierPart(c);
2754    }
2755
2756    /**
2757     * Indicates whether the specified character is a letter.
2758     *
2759     * @param c
2760     *            the character to check.
2761     * @return {@code true} if {@code c} is a letter; {@code false} otherwise.
2762     */
2763    public static boolean isLetter(char c) {
2764        // BEGIN android-changed
2765        // if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
2766        //     return true;
2767        // }
2768        // if (c < 128) {
2769        //     return false;
2770        // }
2771        // int type = getType(c);
2772        // return type >= UPPERCASE_LETTER && type <= OTHER_LETTER;
2773        return UCharacter.isLetter(c);
2774        // END android-changed
2775    }
2776
2777    /**
2778     * Indicates whether the specified code point is a letter.
2779     *
2780     * @param codePoint
2781     *            the code point to check.
2782     * @return {@code true} if {@code codePoint} is a letter; {@code false}
2783     *         otherwise.
2784     */
2785    public static boolean isLetter(int codePoint) {
2786        return UCharacter.isLetter(codePoint);
2787    }
2788
2789    /**
2790     * Indicates whether the specified character is a letter or a digit.
2791     *
2792     * @param c
2793     *            the character to check.
2794     * @return {@code true} if {@code c} is a letter or a digit; {@code false}
2795     *         otherwise.
2796     */
2797    public static boolean isLetterOrDigit(char c) {
2798        // BEGIN android-changed
2799        // int type = getType(c);
2800        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2801        //         || type == DECIMAL_DIGIT_NUMBER;
2802        return UCharacter.isLetterOrDigit(c);
2803        // END andorid-changed
2804    }
2805
2806    /**
2807     * Indicates whether the specified code point is a letter or a digit.
2808     *
2809     * @param codePoint
2810     *            the code point to check.
2811     * @return {@code true} if {@code codePoint} is a letter or a digit;
2812     *         {@code false} otherwise.
2813     */
2814    public static boolean isLetterOrDigit(int codePoint) {
2815        return UCharacter.isLetterOrDigit(codePoint);
2816    }
2817
2818    /**
2819     * Indicates whether the specified character is a lower case letter.
2820     *
2821     * @param c
2822     *            the character to check.
2823     * @return {@code true} if {@code c} is a lower case letter; {@code false}
2824     *         otherwise.
2825     */
2826    public static boolean isLowerCase(char c) {
2827        // BEGIN android-changed
2828        // // Optimized case for ASCII
2829        // if ('a' <= c && c <= 'z') {
2830        //     return true;
2831        // }
2832        // if (c < 128) {
2833        //     return false;
2834        // }
2835        //
2836        // return getType(c) == LOWERCASE_LETTER;
2837        return UCharacter.isLowerCase(c);
2838        // END android-changed
2839    }
2840
2841    /**
2842     * Indicates whether the specified code point is a lower case letter.
2843     *
2844     * @param codePoint
2845     *            the code point to check.
2846     * @return {@code true} if {@code codePoint} is a lower case letter;
2847     *         {@code false} otherwise.
2848     */
2849    public static boolean isLowerCase(int codePoint) {
2850        return UCharacter.isLowerCase(codePoint);
2851    }
2852
2853    /**
2854     * Indicates whether the specified character is a Java space.
2855     *
2856     * @param c
2857     *            the character to check.
2858     * @return {@code true} if {@code c} is a Java space; {@code false}
2859     *         otherwise.
2860     * @deprecated Use {@link #isWhitespace(char)}
2861     */
2862    @Deprecated
2863    public static boolean isSpace(char c) {
2864        return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' ';
2865    }
2866
2867    /**
2868     * Indicates whether the specified character is a Unicode space character.
2869     * That is, if it is a member of one of the Unicode categories Space
2870     * Separator, Line Separator, or Paragraph Separator.
2871     *
2872     * @param c
2873     *            the character to check.
2874     * @return {@code true} if {@code c} is a Unicode space character,
2875     *         {@code false} otherwise.
2876     */
2877    public static boolean isSpaceChar(char c) {
2878        // BEGIN android-changed
2879        // if (c == 0x20 || c == 0xa0 || c == 0x1680) {
2880        //     return true;
2881        // }
2882        // if (c < 0x2000) {
2883        //     return false;
2884        // }
2885        // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f
2886        //         || c == 0x3000;
2887        return UCharacter.isSpaceChar(c);
2888        // END android-changed
2889    }
2890
2891    /**
2892     * Indicates whether the specified code point is a Unicode space character.
2893     * That is, if it is a member of one of the Unicode categories Space
2894     * Separator, Line Separator, or Paragraph Separator.
2895     *
2896     * @param codePoint
2897     *            the code point to check.
2898     * @return {@code true} if {@code codePoint} is a Unicode space character,
2899     *         {@code false} otherwise.
2900     */
2901    public static boolean isSpaceChar(int codePoint) {
2902        return UCharacter.isSpaceChar(codePoint);
2903    }
2904
2905    /**
2906     * Indicates whether the specified character is a titlecase character.
2907     *
2908     * @param c
2909     *            the character to check.
2910     * @return {@code true} if {@code c} is a titlecase character, {@code false}
2911     *         otherwise.
2912     */
2913    public static boolean isTitleCase(char c) {
2914        // BEGIN android-changed
2915        // if (c == '\u01c5' || c == '\u01c8' || c == '\u01cb' || c == '\u01f2') {
2916        //     return true;
2917        // }
2918        // if (c >= '\u1f88' && c <= '\u1ffc') {
2919        //     // 0x1f88 - 0x1f8f, 0x1f98 - 0x1f9f, 0x1fa8 - 0x1faf
2920        //     if (c > '\u1faf') {
2921        //         return c == '\u1fbc' || c == '\u1fcc' || c == '\u1ffc';
2922        //     }
2923        //     int last = c & 0xf;
2924        //     return last >= 8 && last <= 0xf;
2925        // }
2926        // return false;
2927        return UCharacter.isTitleCase(c);
2928        // END android-changed
2929    }
2930
2931    /**
2932     * Indicates whether the specified code point is a titlecase character.
2933     *
2934     * @param codePoint
2935     *            the code point to check.
2936     * @return {@code true} if {@code codePoint} is a titlecase character,
2937     *         {@code false} otherwise.
2938     */
2939    public static boolean isTitleCase(int codePoint) {
2940        return UCharacter.isTitleCase(codePoint);
2941    }
2942
2943    /**
2944     * Indicates whether the specified character is valid as part of a Unicode
2945     * identifier other than the first character.
2946     *
2947     * @param c
2948     *            the character to check.
2949     * @return {@code true} if {@code c} is valid as part of a Unicode
2950     *         identifier; {@code false} otherwise.
2951     */
2952    public static boolean isUnicodeIdentifierPart(char c) {
2953        // BEGIN android-changed
2954        // int type = getType(c);
2955        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2956        //         || type == CONNECTOR_PUNCTUATION
2957        //         || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
2958        //         || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK
2959        //         || isIdentifierIgnorable(c);
2960        return UCharacter.isUnicodeIdentifierPart(c);
2961        // END android-changed
2962    }
2963
2964    /**
2965     * Indicates whether the specified code point is valid as part of a Unicode
2966     * identifier other than the first character.
2967     *
2968     * @param codePoint
2969     *            the code point to check.
2970     * @return {@code true} if {@code codePoint} is valid as part of a Unicode
2971     *         identifier; {@code false} otherwise.
2972     */
2973    public static boolean isUnicodeIdentifierPart(int codePoint) {
2974        return UCharacter.isUnicodeIdentifierPart(codePoint);
2975    }
2976
2977    /**
2978     * Indicates whether the specified character is a valid initial character
2979     * for a Unicode identifier.
2980     *
2981     * @param c
2982     *            the character to check.
2983     * @return {@code true} if {@code c} is a valid first character for a
2984     *         Unicode identifier; {@code false} otherwise.
2985     */
2986    public static boolean isUnicodeIdentifierStart(char c) {
2987        // BEGIN android-changed
2988        // int type = getType(c);
2989        // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
2990        //         || type == LETTER_NUMBER;
2991        return UCharacter.isUnicodeIdentifierStart(c);
2992        // END android-changed
2993    }
2994
2995    /**
2996     * Indicates whether the specified code point is a valid initial character
2997     * for a Unicode identifier.
2998     *
2999     * @param codePoint
3000     *            the code point to check.
3001     * @return {@code true} if {@code codePoint} is a valid first character for
3002     *         a Unicode identifier; {@code false} otherwise.
3003     */
3004    public static boolean isUnicodeIdentifierStart(int codePoint) {
3005        return UCharacter.isUnicodeIdentifierStart(codePoint);
3006    }
3007
3008    /**
3009     * Indicates whether the specified character is an upper case letter.
3010     *
3011     * @param c
3012     *            the character to check.
3013     * @return {@code true} if {@code c} is a upper case letter; {@code false}
3014     *         otherwise.
3015     */
3016    public static boolean isUpperCase(char c) {
3017        // Optimized case for ASCII
3018        if ('A' <= c && c <= 'Z') {
3019            return true;
3020        }
3021        if (c < 128) {
3022            return false;
3023        }
3024        // BEGIN android-changed
3025        return UCharacter.isUpperCase(c);
3026        // END android-changed
3027    }
3028
3029    /**
3030     * Indicates whether the specified code point is an upper case letter.
3031     *
3032     * @param codePoint
3033     *            the code point to check.
3034     * @return {@code true} if {@code codePoint} is a upper case letter;
3035     *         {@code false} otherwise.
3036     */
3037    public static boolean isUpperCase(int codePoint) {
3038        return UCharacter.isUpperCase(codePoint);
3039    }
3040
3041    /**
3042     * Indicates whether the specified character is a whitespace character in
3043     * Java.
3044     *
3045     * @param c
3046     *            the character to check.
3047     * @return {@code true} if the supplied {@code c} is a whitespace character
3048     *         in Java; {@code false} otherwise.
3049     */
3050    public static boolean isWhitespace(char c) {
3051        // BEGIN android-changed
3052        // // Optimized case for ASCII
3053        // if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) {
3054        //     return true;
3055        // }
3056        // if (c == 0x1680) {
3057        //     return true;
3058        // }
3059        // if (c < 0x2000 || c == 0x2007) {
3060        //     return false;
3061        // }
3062        // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000;
3063        return UCharacter.isWhitespace(c);
3064        // END android-changed
3065    }
3066
3067    /**
3068     * Indicates whether the specified code point is a whitespace character in
3069     * Java.
3070     *
3071     * @param codePoint
3072     *            the code point to check.
3073     * @return {@code true} if the supplied {@code c} is a whitespace character
3074     *         in Java; {@code false} otherwise.
3075     */
3076    public static boolean isWhitespace(int codePoint) {
3077        //FIXME depends on ICU when the codePoint is '\u2007'
3078        return UCharacter.isWhitespace(codePoint);
3079
3080    }
3081
3082    /**
3083     * Reverses the order of the first and second byte in the specified
3084     * character.
3085     *
3086     * @param c
3087     *            the character to reverse.
3088     * @return the character with reordered bytes.
3089     */
3090    public static char reverseBytes(char c) {
3091        return (char)((c<<8) | (c>>8));
3092    }
3093
3094    /**
3095     * Returns the lower case equivalent for the specified character if the
3096     * character is an upper case letter. Otherwise, the specified character is
3097     * returned unchanged.
3098     *
3099     * @param c
3100     *            the character
3101     * @return if {@code c} is an upper case character then its lower case
3102     *         counterpart, otherwise just {@code c}.
3103     */
3104    public static char toLowerCase(char c) {
3105        // BEGIN android-changed
3106        // // Optimized case for ASCII
3107        // if ('A' <= c && c <= 'Z') {
3108        //     return (char) (c + ('a' - 'A'));
3109        // }
3110        // if (c < 192) {// || c == 215 || (c > 222 && c < 256)) {
3111        //     return c;
3112        // }
3113        // if (c<1000) {
3114        //     return (char)lowercaseValuesCache[c-192];
3115        // }
3116        //
3117        // int result = BinarySearch.binarySearchRange(lowercaseKeys, c);
3118        // if (result >= 0) {
3119        //     boolean by2 = false;
3120        //     char start = lowercaseKeys.charAt(result);
3121        //     char end = lowercaseValues[result * 2];
3122        //     if ((start & 0x8000) != (end & 0x8000)) {
3123        //         end ^= 0x8000;
3124        //         by2 = true;
3125        //     }
3126        //     if (c <= end) {
3127        //         if (by2 && (c & 1) != (start & 1)) {
3128        //             return c;
3129        //         }
3130        //         char mapping = lowercaseValues[result * 2 + 1];
3131        //         return (char) (c + mapping);
3132        //     }
3133        // }
3134        // return c;
3135        return (char)UCharacter.toLowerCase(c);
3136        // END android-changed
3137    }
3138
3139    /**
3140     * Returns the lower case equivalent for the specified code point if it is
3141     * an upper case letter. Otherwise, the specified code point is returned
3142     * unchanged.
3143     *
3144     * @param codePoint
3145     *            the code point to check.
3146     * @return if {@code codePoint} is an upper case character then its lower
3147     *         case counterpart, otherwise just {@code codePoint}.
3148     */
3149    public static int toLowerCase(int codePoint) {
3150        return UCharacter.toLowerCase(codePoint);
3151    }
3152
3153    @Override
3154    public String toString() {
3155        return String.valueOf(value);
3156    }
3157
3158    /**
3159     * Converts the specified character to its string representation.
3160     *
3161     * @param value
3162     *            the character to convert.
3163     * @return the character converted to a string.
3164     */
3165    public static String toString(char value) {
3166        return String.valueOf(value);
3167    }
3168
3169    /**
3170     * Returns the title case equivalent for the specified character if it
3171     * exists. Otherwise, the specified character is returned unchanged.
3172     *
3173     * @param c
3174     *            the character to convert.
3175     * @return the title case equivalent of {@code c} if it exists, otherwise
3176     *         {@code c}.
3177     */
3178    public static char toTitleCase(char c) {
3179        // BEGIN android-changed
3180        // if (isTitleCase(c)) {
3181        //     return c;
3182        // }
3183        // int result = BinarySearch.binarySearch(titlecaseKeys, c);
3184        // if (result >= 0) {
3185        //     return titlecaseValues[result];
3186        // }
3187        // return toUpperCase(c);
3188        return (char)UCharacter.toTitleCase(c);
3189        // ENd android-changed
3190    }
3191
3192    /**
3193     * Returns the title case equivalent for the specified code point if it
3194     * exists. Otherwise, the specified code point is returned unchanged.
3195     *
3196     * @param codePoint
3197     *            the code point to convert.
3198     * @return the title case equivalent of {@code codePoint} if it exists,
3199     *         otherwise {@code codePoint}.
3200     */
3201    public static int toTitleCase(int codePoint) {
3202        return UCharacter.toTitleCase(codePoint);
3203    }
3204
3205    /**
3206     * Returns the upper case equivalent for the specified character if the
3207     * character is a lower case letter. Otherwise, the specified character is
3208     * returned unchanged.
3209     *
3210     * @param c
3211     *            the character to convert.
3212     * @return if {@code c} is a lower case character then its upper case
3213     *         counterpart, otherwise just {@code c}.
3214     */
3215    public static char toUpperCase(char c) {
3216        // BEGIN android-changed
3217        // // Optimized case for ASCII
3218        // if ('a' <= c && c <= 'z') {
3219        //     return (char) (c - ('a' - 'A'));
3220        // }
3221        // if (c < 181) {
3222        //     return c;
3223        // }
3224        // if (c<1000) {
3225        //     return (char)uppercaseValuesCache[(int)c-181];
3226        // }
3227        // int result = BinarySearch.binarySearchRange(uppercaseKeys, c);
3228        // if (result >= 0) {
3229        //     boolean by2 = false;
3230        //     char start = uppercaseKeys.charAt(result);
3231        //     char end = uppercaseValues[result * 2];
3232        //     if ((start & 0x8000) != (end & 0x8000)) {
3233        //         end ^= 0x8000;
3234        //         by2 = true;
3235        //     }
3236        //     if (c <= end) {
3237        //         if (by2 && (c & 1) != (start & 1)) {
3238        //             return c;
3239        //         }
3240        //         char mapping = uppercaseValues[result * 2 + 1];
3241        //         return (char) (c + mapping);
3242        //     }
3243        // }
3244        // return c;
3245        return (char)UCharacter.toUpperCase(c);
3246        // END android-changed
3247    }
3248
3249    /**
3250     * Returns the upper case equivalent for the specified code point if the
3251     * code point is a lower case letter. Otherwise, the specified code point is
3252     * returned unchanged.
3253     *
3254     * @param codePoint
3255     *            the code point to convert.
3256     * @return if {@code codePoint} is a lower case character then its upper
3257     *         case counterpart, otherwise just {@code codePoint}.
3258     */
3259    public static int toUpperCase(int codePoint) {
3260        return UCharacter.toUpperCase(codePoint);
3261    }
3262}
3263