Character.java revision e09f203cc6322441d0b1d3c75fe1e191d1ed0386
1/*
2 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28import dalvik.annotation.optimization.FastNative;
29import java.util.Arrays;
30import java.util.HashMap;
31import java.util.Locale;
32import java.util.Map;
33
34// Android-changed: Remove reference to a specific unicode standard version
35/**
36 * The {@code Character} class wraps a value of the primitive
37 * type {@code char} in an object. An object of type
38 * {@code Character} contains a single field whose type is
39 * {@code char}.
40 * <p>
41 * In addition, this class provides several methods for determining
42 * a character's category (lowercase letter, digit, etc.) and for converting
43 * characters from uppercase to lowercase and vice versa.
44 * <p>
45 * Character information is based on the Unicode Standard
46 * <p>
47 * The methods and data of class {@code Character} are defined by
48 * the information in the <i>UnicodeData</i> file that is part of the
49 * Unicode Character Database maintained by the Unicode
50 * Consortium. This file specifies various properties including name
51 * and general category for every defined Unicode code point or
52 * character range.
53 * <p>
54 * The file and its description are available from the Unicode Consortium at:
55 * <ul>
56 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
57 * </ul>
58 *
59 * <h3><a name="unicode">Unicode Character Representations</a></h3>
60 *
61 * <p>The {@code char} data type (and therefore the value that a
62 * {@code Character} object encapsulates) are based on the
63 * original Unicode specification, which defined characters as
64 * fixed-width 16-bit entities. The Unicode Standard has since been
65 * changed to allow for characters whose representation requires more
66 * than 16 bits.  The range of legal <em>code point</em>s is now
67 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
68 * (Refer to the <a
69 * href="http://www.unicode.org/reports/tr27/#notation"><i>
70 * definition</i></a> of the U+<i>n</i> notation in the Unicode
71 * Standard.)
72 *
73 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
74 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
75 * <a name="supplementary">Characters</a> whose code points are greater
76 * than U+FFFF are called <em>supplementary character</em>s.  The Java
77 * platform uses the UTF-16 representation in {@code char} arrays and
78 * in the {@code String} and {@code StringBuffer} classes. In
79 * this representation, supplementary characters are represented as a pair
80 * of {@code char} values, the first from the <em>high-surrogates</em>
81 * range, (&#92;uD800-&#92;uDBFF), the second from the
82 * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
83 *
84 * <p>A {@code char} value, therefore, represents Basic
85 * Multilingual Plane (BMP) code points, including the surrogate
86 * code points, or code units of the UTF-16 encoding. An
87 * {@code int} value represents all Unicode code points,
88 * including supplementary code points. The lower (least significant)
89 * 21 bits of {@code int} are used to represent Unicode code
90 * points and the upper (most significant) 11 bits must be zero.
91 * Unless otherwise specified, the behavior with respect to
92 * supplementary characters and surrogate {@code char} values is
93 * as follows:
94 *
95 * <ul>
96 * <li>The methods that only accept a {@code char} value cannot support
97 * supplementary characters. They treat {@code char} values from the
98 * surrogate ranges as undefined characters. For example,
99 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
100 * this specific value if followed by any low-surrogate value in a string
101 * would represent a letter.
102 *
103 * <li>The methods that accept an {@code int} value support all
104 * Unicode characters, including supplementary characters. For
105 * example, {@code Character.isLetter(0x2F81A)} returns
106 * {@code true} because the code point value represents a letter
107 * (a CJK ideograph).
108 * </ul>
109 *
110 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
111 * used for character values in the range between U+0000 and U+10FFFF,
112 * and <em>Unicode code unit</em> is used for 16-bit
113 * {@code char} values that are code units of the <em>UTF-16</em>
114 * encoding. For more information on Unicode terminology, refer to the
115 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
116 *
117 * @author  Lee Boynton
118 * @author  Guy Steele
119 * @author  Akira Tanaka
120 * @author  Martin Buchholz
121 * @author  Ulf Zibis
122 * @since   1.0
123 */
124public final
125class Character implements java.io.Serializable, Comparable<Character> {
126    /**
127     * The minimum radix available for conversion to and from strings.
128     * The constant value of this field is the smallest value permitted
129     * for the radix argument in radix-conversion methods such as the
130     * {@code digit} method, the {@code forDigit} method, and the
131     * {@code toString} method of class {@code Integer}.
132     *
133     * @see     Character#digit(char, int)
134     * @see     Character#forDigit(int, int)
135     * @see     Integer#toString(int, int)
136     * @see     Integer#valueOf(String)
137     */
138    public static final int MIN_RADIX = 2;
139
140    /**
141     * The maximum radix available for conversion to and from strings.
142     * The constant value of this field is the largest value permitted
143     * for the radix argument in radix-conversion methods such as the
144     * {@code digit} method, the {@code forDigit} method, and the
145     * {@code toString} method of class {@code Integer}.
146     *
147     * @see     Character#digit(char, int)
148     * @see     Character#forDigit(int, int)
149     * @see     Integer#toString(int, int)
150     * @see     Integer#valueOf(String)
151     */
152    public static final int MAX_RADIX = 36;
153
154    /**
155     * The constant value of this field is the smallest value of type
156     * {@code char}, {@code '\u005Cu0000'}.
157     *
158     * @since   1.0.2
159     */
160    public static final char MIN_VALUE = '\u0000';
161
162    /**
163     * The constant value of this field is the largest value of type
164     * {@code char}, {@code '\u005CuFFFF'}.
165     *
166     * @since   1.0.2
167     */
168    public static final char MAX_VALUE = '\uFFFF';
169
170    /**
171     * The {@code Class} instance representing the primitive type
172     * {@code char}.
173     *
174     * @since   1.1
175     */
176    @SuppressWarnings("unchecked")
177    // Android-changed: Avoid use of removed Class.getPrimitiveClass method.
178    // public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
179    public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
180
181    /*
182     * Normative general types
183     */
184
185    /*
186     * General character types
187     */
188
189    /**
190     * General category "Cn" in the Unicode specification.
191     * @since   1.1
192     */
193    public static final byte UNASSIGNED = 0;
194
195    /**
196     * General category "Lu" in the Unicode specification.
197     * @since   1.1
198     */
199    public static final byte UPPERCASE_LETTER = 1;
200
201    /**
202     * General category "Ll" in the Unicode specification.
203     * @since   1.1
204     */
205    public static final byte LOWERCASE_LETTER = 2;
206
207    /**
208     * General category "Lt" in the Unicode specification.
209     * @since   1.1
210     */
211    public static final byte TITLECASE_LETTER = 3;
212
213    /**
214     * General category "Lm" in the Unicode specification.
215     * @since   1.1
216     */
217    public static final byte MODIFIER_LETTER = 4;
218
219    /**
220     * General category "Lo" in the Unicode specification.
221     * @since   1.1
222     */
223    public static final byte OTHER_LETTER = 5;
224
225    /**
226     * General category "Mn" in the Unicode specification.
227     * @since   1.1
228     */
229    public static final byte NON_SPACING_MARK = 6;
230
231    /**
232     * General category "Me" in the Unicode specification.
233     * @since   1.1
234     */
235    public static final byte ENCLOSING_MARK = 7;
236
237    /**
238     * General category "Mc" in the Unicode specification.
239     * @since   1.1
240     */
241    public static final byte COMBINING_SPACING_MARK = 8;
242
243    /**
244     * General category "Nd" in the Unicode specification.
245     * @since   1.1
246     */
247    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
248
249    /**
250     * General category "Nl" in the Unicode specification.
251     * @since   1.1
252     */
253    public static final byte LETTER_NUMBER = 10;
254
255    /**
256     * General category "No" in the Unicode specification.
257     * @since   1.1
258     */
259    public static final byte OTHER_NUMBER = 11;
260
261    /**
262     * General category "Zs" in the Unicode specification.
263     * @since   1.1
264     */
265    public static final byte SPACE_SEPARATOR = 12;
266
267    /**
268     * General category "Zl" in the Unicode specification.
269     * @since   1.1
270     */
271    public static final byte LINE_SEPARATOR = 13;
272
273    /**
274     * General category "Zp" in the Unicode specification.
275     * @since   1.1
276     */
277    public static final byte PARAGRAPH_SEPARATOR = 14;
278
279    /**
280     * General category "Cc" in the Unicode specification.
281     * @since   1.1
282     */
283    public static final byte CONTROL = 15;
284
285    /**
286     * General category "Cf" in the Unicode specification.
287     * @since   1.1
288     */
289    public static final byte FORMAT = 16;
290
291    /**
292     * General category "Co" in the Unicode specification.
293     * @since   1.1
294     */
295    public static final byte PRIVATE_USE = 18;
296
297    /**
298     * General category "Cs" in the Unicode specification.
299     * @since   1.1
300     */
301    public static final byte SURROGATE = 19;
302
303    /**
304     * General category "Pd" in the Unicode specification.
305     * @since   1.1
306     */
307    public static final byte DASH_PUNCTUATION = 20;
308
309    /**
310     * General category "Ps" in the Unicode specification.
311     * @since   1.1
312     */
313    public static final byte START_PUNCTUATION = 21;
314
315    /**
316     * General category "Pe" in the Unicode specification.
317     * @since   1.1
318     */
319    public static final byte END_PUNCTUATION = 22;
320
321    /**
322     * General category "Pc" in the Unicode specification.
323     * @since   1.1
324     */
325    public static final byte CONNECTOR_PUNCTUATION = 23;
326
327    /**
328     * General category "Po" in the Unicode specification.
329     * @since   1.1
330     */
331    public static final byte OTHER_PUNCTUATION = 24;
332
333    /**
334     * General category "Sm" in the Unicode specification.
335     * @since   1.1
336     */
337    public static final byte MATH_SYMBOL = 25;
338
339    /**
340     * General category "Sc" in the Unicode specification.
341     * @since   1.1
342     */
343    public static final byte CURRENCY_SYMBOL = 26;
344
345    /**
346     * General category "Sk" in the Unicode specification.
347     * @since   1.1
348     */
349    public static final byte MODIFIER_SYMBOL = 27;
350
351    /**
352     * General category "So" in the Unicode specification.
353     * @since   1.1
354     */
355    public static final byte OTHER_SYMBOL = 28;
356
357    /**
358     * General category "Pi" in the Unicode specification.
359     * @since   1.4
360     */
361    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
362
363    /**
364     * General category "Pf" in the Unicode specification.
365     * @since   1.4
366     */
367    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
368
369    /**
370     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
371     */
372    static final int ERROR = 0xFFFFFFFF;
373
374
375    /**
376     * Undefined bidirectional character type. Undefined {@code char}
377     * values have undefined directionality in the Unicode specification.
378     * @since 1.4
379     */
380    public static final byte DIRECTIONALITY_UNDEFINED = -1;
381
382    /**
383     * Strong bidirectional character type "L" in the Unicode specification.
384     * @since 1.4
385     */
386    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
387
388    /**
389     * Strong bidirectional character type "R" in the Unicode specification.
390     * @since 1.4
391     */
392    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
393
394    /**
395    * Strong bidirectional character type "AL" in the Unicode specification.
396     * @since 1.4
397     */
398    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
399
400    /**
401     * Weak bidirectional character type "EN" in the Unicode specification.
402     * @since 1.4
403     */
404    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
405
406    /**
407     * Weak bidirectional character type "ES" in the Unicode specification.
408     * @since 1.4
409     */
410    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
411
412    /**
413     * Weak bidirectional character type "ET" in the Unicode specification.
414     * @since 1.4
415     */
416    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
417
418    /**
419     * Weak bidirectional character type "AN" in the Unicode specification.
420     * @since 1.4
421     */
422    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
423
424    /**
425     * Weak bidirectional character type "CS" in the Unicode specification.
426     * @since 1.4
427     */
428    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
429
430    /**
431     * Weak bidirectional character type "NSM" in the Unicode specification.
432     * @since 1.4
433     */
434    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
435
436    /**
437     * Weak bidirectional character type "BN" in the Unicode specification.
438     * @since 1.4
439     */
440    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
441
442    /**
443     * Neutral bidirectional character type "B" in the Unicode specification.
444     * @since 1.4
445     */
446    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
447
448    /**
449     * Neutral bidirectional character type "S" in the Unicode specification.
450     * @since 1.4
451     */
452    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
453
454    /**
455     * Neutral bidirectional character type "WS" in the Unicode specification.
456     * @since 1.4
457     */
458    public static final byte DIRECTIONALITY_WHITESPACE = 12;
459
460    /**
461     * Neutral bidirectional character type "ON" in the Unicode specification.
462     * @since 1.4
463     */
464    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
465
466    /**
467     * Strong bidirectional character type "LRE" in the Unicode specification.
468     * @since 1.4
469     */
470    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
471
472    /**
473     * Strong bidirectional character type "LRO" in the Unicode specification.
474     * @since 1.4
475     */
476    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
477
478    /**
479     * Strong bidirectional character type "RLE" in the Unicode specification.
480     * @since 1.4
481     */
482    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
483
484    /**
485     * Strong bidirectional character type "RLO" in the Unicode specification.
486     * @since 1.4
487     */
488    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
489
490    /**
491     * Weak bidirectional character type "PDF" in the Unicode specification.
492     * @since 1.4
493     */
494    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
495
496    /**
497     * The minimum value of a
498     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
499     * Unicode high-surrogate code unit</a>
500     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
501     * A high-surrogate is also known as a <i>leading-surrogate</i>.
502     *
503     * @since 1.5
504     */
505    public static final char MIN_HIGH_SURROGATE = '\uD800';
506
507    /**
508     * The maximum value of a
509     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
510     * Unicode high-surrogate code unit</a>
511     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
512     * A high-surrogate is also known as a <i>leading-surrogate</i>.
513     *
514     * @since 1.5
515     */
516    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
517
518    /**
519     * The minimum value of a
520     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
521     * Unicode low-surrogate code unit</a>
522     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
523     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
524     *
525     * @since 1.5
526     */
527    public static final char MIN_LOW_SURROGATE  = '\uDC00';
528
529    /**
530     * The maximum value of a
531     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
532     * Unicode low-surrogate code unit</a>
533     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
534     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
535     *
536     * @since 1.5
537     */
538    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
539
540    /**
541     * The minimum value of a Unicode surrogate code unit in the
542     * UTF-16 encoding, constant {@code '\u005CuD800'}.
543     *
544     * @since 1.5
545     */
546    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
547
548    /**
549     * The maximum value of a Unicode surrogate code unit in the
550     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
551     *
552     * @since 1.5
553     */
554    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
555
556    /**
557     * The minimum value of a
558     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
559     * Unicode supplementary code point</a>, constant {@code U+10000}.
560     *
561     * @since 1.5
562     */
563    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
564
565    /**
566     * The minimum value of a
567     * <a href="http://www.unicode.org/glossary/#code_point">
568     * Unicode code point</a>, constant {@code U+0000}.
569     *
570     * @since 1.5
571     */
572    public static final int MIN_CODE_POINT = 0x000000;
573
574    /**
575     * The maximum value of a
576     * <a href="http://www.unicode.org/glossary/#code_point">
577     * Unicode code point</a>, constant {@code U+10FFFF}.
578     *
579     * @since 1.5
580     */
581    public static final int MAX_CODE_POINT = 0X10FFFF;
582
583    private static final byte[] DIRECTIONALITY = new byte[] {
584            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
585            DIRECTIONALITY_EUROPEAN_NUMBER,
586            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
587            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
588            DIRECTIONALITY_ARABIC_NUMBER,
589            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
590            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
591            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
592            DIRECTIONALITY_OTHER_NEUTRALS,
593            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
594            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
595            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
596            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
597            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
598            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
599            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
600
601    /**
602     * Instances of this class represent particular subsets of the Unicode
603     * character set.  The only family of subsets defined in the
604     * {@code Character} class is {@link Character.UnicodeBlock}.
605     * Other portions of the Java API may define other subsets for their
606     * own purposes.
607     *
608     * @since 1.2
609     */
610    public static class Subset  {
611
612        private String name;
613
614        /**
615         * Constructs a new {@code Subset} instance.
616         *
617         * @param  name  The name of this subset
618         * @exception NullPointerException if name is {@code null}
619         */
620        protected Subset(String name) {
621            if (name == null) {
622                throw new NullPointerException("name");
623            }
624            this.name = name;
625        }
626
627        /**
628         * Compares two {@code Subset} objects for equality.
629         * This method returns {@code true} if and only if
630         * {@code this} and the argument refer to the same
631         * object; since this method is {@code final}, this
632         * guarantee holds for all subclasses.
633         */
634        public final boolean equals(Object obj) {
635            return (this == obj);
636        }
637
638        /**
639         * Returns the standard hash code as defined by the
640         * {@link Object#hashCode} method.  This method
641         * is {@code final} in order to ensure that the
642         * {@code equals} and {@code hashCode} methods will
643         * be consistent in all subclasses.
644         */
645        public final int hashCode() {
646            return super.hashCode();
647        }
648
649        /**
650         * Returns the name of this subset.
651         */
652        public final String toString() {
653            return name;
654        }
655    }
656
657    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
658    // for the latest specification of Unicode Blocks.
659
660    /**
661     * A family of character subsets representing the character blocks in the
662     * Unicode specification. Character blocks generally define characters
663     * used for a specific script or purpose. A character is contained by
664     * at most one Unicode block.
665     *
666     * @since 1.2
667     */
668    public static final class UnicodeBlock extends Subset {
669
670        private static Map<String, UnicodeBlock> map = new HashMap<>(256);
671
672        /**
673         * Creates a UnicodeBlock with the given identifier name.
674         * This name must be the same as the block identifier.
675         */
676        private UnicodeBlock(String idName) {
677            this(idName, true);
678        }
679
680        private UnicodeBlock(String idName, boolean isMap) {
681            super(idName);
682            if (isMap) {
683                map.put(idName, this);
684            }
685        }
686
687        /**
688         * Creates a UnicodeBlock with the given identifier name and
689         * alias name.
690         */
691        private UnicodeBlock(String idName, String alias) {
692            this(idName);
693            map.put(alias, this);
694        }
695
696        /**
697         * Creates a UnicodeBlock with the given identifier name and
698         * alias names.
699         */
700        private UnicodeBlock(String idName, String... aliases) {
701            this(idName);
702            for (String alias : aliases)
703                map.put(alias, this);
704        }
705
706        /**
707         * Constant for the "Basic Latin" Unicode character block.
708         * @since 1.2
709         */
710        public static final UnicodeBlock  BASIC_LATIN =
711            new UnicodeBlock("BASIC_LATIN",
712                             "BASIC LATIN",
713                             "BASICLATIN");
714
715        /**
716         * Constant for the "Latin-1 Supplement" Unicode character block.
717         * @since 1.2
718         */
719        public static final UnicodeBlock LATIN_1_SUPPLEMENT =
720            new UnicodeBlock("LATIN_1_SUPPLEMENT",
721                             "LATIN-1 SUPPLEMENT",
722                             "LATIN-1SUPPLEMENT");
723
724        /**
725         * Constant for the "Latin Extended-A" Unicode character block.
726         * @since 1.2
727         */
728        public static final UnicodeBlock LATIN_EXTENDED_A =
729            new UnicodeBlock("LATIN_EXTENDED_A",
730                             "LATIN EXTENDED-A",
731                             "LATINEXTENDED-A");
732
733        /**
734         * Constant for the "Latin Extended-B" Unicode character block.
735         * @since 1.2
736         */
737        public static final UnicodeBlock LATIN_EXTENDED_B =
738            new UnicodeBlock("LATIN_EXTENDED_B",
739                             "LATIN EXTENDED-B",
740                             "LATINEXTENDED-B");
741
742        /**
743         * Constant for the "IPA Extensions" Unicode character block.
744         * @since 1.2
745         */
746        public static final UnicodeBlock IPA_EXTENSIONS =
747            new UnicodeBlock("IPA_EXTENSIONS",
748                             "IPA EXTENSIONS",
749                             "IPAEXTENSIONS");
750
751        /**
752         * Constant for the "Spacing Modifier Letters" Unicode character block.
753         * @since 1.2
754         */
755        public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
756            new UnicodeBlock("SPACING_MODIFIER_LETTERS",
757                             "SPACING MODIFIER LETTERS",
758                             "SPACINGMODIFIERLETTERS");
759
760        /**
761         * Constant for the "Combining Diacritical Marks" Unicode character block.
762         * @since 1.2
763         */
764        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
765            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
766                             "COMBINING DIACRITICAL MARKS",
767                             "COMBININGDIACRITICALMARKS");
768
769        /**
770         * Constant for the "Greek and Coptic" Unicode character block.
771         * <p>
772         * This block was previously known as the "Greek" block.
773         *
774         * @since 1.2
775         */
776        public static final UnicodeBlock GREEK =
777            new UnicodeBlock("GREEK",
778                             "GREEK AND COPTIC",
779                             "GREEKANDCOPTIC");
780
781        /**
782         * Constant for the "Cyrillic" Unicode character block.
783         * @since 1.2
784         */
785        public static final UnicodeBlock CYRILLIC =
786            new UnicodeBlock("CYRILLIC");
787
788        /**
789         * Constant for the "Armenian" Unicode character block.
790         * @since 1.2
791         */
792        public static final UnicodeBlock ARMENIAN =
793            new UnicodeBlock("ARMENIAN");
794
795        /**
796         * Constant for the "Hebrew" Unicode character block.
797         * @since 1.2
798         */
799        public static final UnicodeBlock HEBREW =
800            new UnicodeBlock("HEBREW");
801
802        /**
803         * Constant for the "Arabic" Unicode character block.
804         * @since 1.2
805         */
806        public static final UnicodeBlock ARABIC =
807            new UnicodeBlock("ARABIC");
808
809        /**
810         * Constant for the "Devanagari" Unicode character block.
811         * @since 1.2
812         */
813        public static final UnicodeBlock DEVANAGARI =
814            new UnicodeBlock("DEVANAGARI");
815
816        /**
817         * Constant for the "Bengali" Unicode character block.
818         * @since 1.2
819         */
820        public static final UnicodeBlock BENGALI =
821            new UnicodeBlock("BENGALI");
822
823        /**
824         * Constant for the "Gurmukhi" Unicode character block.
825         * @since 1.2
826         */
827        public static final UnicodeBlock GURMUKHI =
828            new UnicodeBlock("GURMUKHI");
829
830        /**
831         * Constant for the "Gujarati" Unicode character block.
832         * @since 1.2
833         */
834        public static final UnicodeBlock GUJARATI =
835            new UnicodeBlock("GUJARATI");
836
837        /**
838         * Constant for the "Oriya" Unicode character block.
839         * @since 1.2
840         */
841        public static final UnicodeBlock ORIYA =
842            new UnicodeBlock("ORIYA");
843
844        /**
845         * Constant for the "Tamil" Unicode character block.
846         * @since 1.2
847         */
848        public static final UnicodeBlock TAMIL =
849            new UnicodeBlock("TAMIL");
850
851        /**
852         * Constant for the "Telugu" Unicode character block.
853         * @since 1.2
854         */
855        public static final UnicodeBlock TELUGU =
856            new UnicodeBlock("TELUGU");
857
858        /**
859         * Constant for the "Kannada" Unicode character block.
860         * @since 1.2
861         */
862        public static final UnicodeBlock KANNADA =
863            new UnicodeBlock("KANNADA");
864
865        /**
866         * Constant for the "Malayalam" Unicode character block.
867         * @since 1.2
868         */
869        public static final UnicodeBlock MALAYALAM =
870            new UnicodeBlock("MALAYALAM");
871
872        /**
873         * Constant for the "Thai" Unicode character block.
874         * @since 1.2
875         */
876        public static final UnicodeBlock THAI =
877            new UnicodeBlock("THAI");
878
879        /**
880         * Constant for the "Lao" Unicode character block.
881         * @since 1.2
882         */
883        public static final UnicodeBlock LAO =
884            new UnicodeBlock("LAO");
885
886        /**
887         * Constant for the "Tibetan" Unicode character block.
888         * @since 1.2
889         */
890        public static final UnicodeBlock TIBETAN =
891            new UnicodeBlock("TIBETAN");
892
893        /**
894         * Constant for the "Georgian" Unicode character block.
895         * @since 1.2
896         */
897        public static final UnicodeBlock GEORGIAN =
898            new UnicodeBlock("GEORGIAN");
899
900        /**
901         * Constant for the "Hangul Jamo" Unicode character block.
902         * @since 1.2
903         */
904        public static final UnicodeBlock HANGUL_JAMO =
905            new UnicodeBlock("HANGUL_JAMO",
906                             "HANGUL JAMO",
907                             "HANGULJAMO");
908
909        /**
910         * Constant for the "Latin Extended Additional" Unicode character block.
911         * @since 1.2
912         */
913        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
914            new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
915                             "LATIN EXTENDED ADDITIONAL",
916                             "LATINEXTENDEDADDITIONAL");
917
918        /**
919         * Constant for the "Greek Extended" Unicode character block.
920         * @since 1.2
921         */
922        public static final UnicodeBlock GREEK_EXTENDED =
923            new UnicodeBlock("GREEK_EXTENDED",
924                             "GREEK EXTENDED",
925                             "GREEKEXTENDED");
926
927        /**
928         * Constant for the "General Punctuation" Unicode character block.
929         * @since 1.2
930         */
931        public static final UnicodeBlock GENERAL_PUNCTUATION =
932            new UnicodeBlock("GENERAL_PUNCTUATION",
933                             "GENERAL PUNCTUATION",
934                             "GENERALPUNCTUATION");
935
936        /**
937         * Constant for the "Superscripts and Subscripts" Unicode character
938         * block.
939         * @since 1.2
940         */
941        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
942            new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
943                             "SUPERSCRIPTS AND SUBSCRIPTS",
944                             "SUPERSCRIPTSANDSUBSCRIPTS");
945
946        /**
947         * Constant for the "Currency Symbols" Unicode character block.
948         * @since 1.2
949         */
950        public static final UnicodeBlock CURRENCY_SYMBOLS =
951            new UnicodeBlock("CURRENCY_SYMBOLS",
952                             "CURRENCY SYMBOLS",
953                             "CURRENCYSYMBOLS");
954
955        /**
956         * Constant for the "Combining Diacritical Marks for Symbols" Unicode
957         * character block.
958         * <p>
959         * This block was previously known as "Combining Marks for Symbols".
960         * @since 1.2
961         */
962        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
963            new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
964                             "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
965                             "COMBININGDIACRITICALMARKSFORSYMBOLS",
966                             "COMBINING MARKS FOR SYMBOLS",
967                             "COMBININGMARKSFORSYMBOLS");
968
969        /**
970         * Constant for the "Letterlike Symbols" Unicode character block.
971         * @since 1.2
972         */
973        public static final UnicodeBlock LETTERLIKE_SYMBOLS =
974            new UnicodeBlock("LETTERLIKE_SYMBOLS",
975                             "LETTERLIKE SYMBOLS",
976                             "LETTERLIKESYMBOLS");
977
978        /**
979         * Constant for the "Number Forms" Unicode character block.
980         * @since 1.2
981         */
982        public static final UnicodeBlock NUMBER_FORMS =
983            new UnicodeBlock("NUMBER_FORMS",
984                             "NUMBER FORMS",
985                             "NUMBERFORMS");
986
987        /**
988         * Constant for the "Arrows" Unicode character block.
989         * @since 1.2
990         */
991        public static final UnicodeBlock ARROWS =
992            new UnicodeBlock("ARROWS");
993
994        /**
995         * Constant for the "Mathematical Operators" Unicode character block.
996         * @since 1.2
997         */
998        public static final UnicodeBlock MATHEMATICAL_OPERATORS =
999            new UnicodeBlock("MATHEMATICAL_OPERATORS",
1000                             "MATHEMATICAL OPERATORS",
1001                             "MATHEMATICALOPERATORS");
1002
1003        /**
1004         * Constant for the "Miscellaneous Technical" Unicode character block.
1005         * @since 1.2
1006         */
1007        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1008            new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1009                             "MISCELLANEOUS TECHNICAL",
1010                             "MISCELLANEOUSTECHNICAL");
1011
1012        /**
1013         * Constant for the "Control Pictures" Unicode character block.
1014         * @since 1.2
1015         */
1016        public static final UnicodeBlock CONTROL_PICTURES =
1017            new UnicodeBlock("CONTROL_PICTURES",
1018                             "CONTROL PICTURES",
1019                             "CONTROLPICTURES");
1020
1021        /**
1022         * Constant for the "Optical Character Recognition" Unicode character block.
1023         * @since 1.2
1024         */
1025        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1026            new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1027                             "OPTICAL CHARACTER RECOGNITION",
1028                             "OPTICALCHARACTERRECOGNITION");
1029
1030        /**
1031         * Constant for the "Enclosed Alphanumerics" Unicode character block.
1032         * @since 1.2
1033         */
1034        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1035            new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1036                             "ENCLOSED ALPHANUMERICS",
1037                             "ENCLOSEDALPHANUMERICS");
1038
1039        /**
1040         * Constant for the "Box Drawing" Unicode character block.
1041         * @since 1.2
1042         */
1043        public static final UnicodeBlock BOX_DRAWING =
1044            new UnicodeBlock("BOX_DRAWING",
1045                             "BOX DRAWING",
1046                             "BOXDRAWING");
1047
1048        /**
1049         * Constant for the "Block Elements" Unicode character block.
1050         * @since 1.2
1051         */
1052        public static final UnicodeBlock BLOCK_ELEMENTS =
1053            new UnicodeBlock("BLOCK_ELEMENTS",
1054                             "BLOCK ELEMENTS",
1055                             "BLOCKELEMENTS");
1056
1057        /**
1058         * Constant for the "Geometric Shapes" Unicode character block.
1059         * @since 1.2
1060         */
1061        public static final UnicodeBlock GEOMETRIC_SHAPES =
1062            new UnicodeBlock("GEOMETRIC_SHAPES",
1063                             "GEOMETRIC SHAPES",
1064                             "GEOMETRICSHAPES");
1065
1066        /**
1067         * Constant for the "Miscellaneous Symbols" Unicode character block.
1068         * @since 1.2
1069         */
1070        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1071            new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1072                             "MISCELLANEOUS SYMBOLS",
1073                             "MISCELLANEOUSSYMBOLS");
1074
1075        /**
1076         * Constant for the "Dingbats" Unicode character block.
1077         * @since 1.2
1078         */
1079        public static final UnicodeBlock DINGBATS =
1080            new UnicodeBlock("DINGBATS");
1081
1082        /**
1083         * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1084         * @since 1.2
1085         */
1086        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1087            new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1088                             "CJK SYMBOLS AND PUNCTUATION",
1089                             "CJKSYMBOLSANDPUNCTUATION");
1090
1091        /**
1092         * Constant for the "Hiragana" Unicode character block.
1093         * @since 1.2
1094         */
1095        public static final UnicodeBlock HIRAGANA =
1096            new UnicodeBlock("HIRAGANA");
1097
1098        /**
1099         * Constant for the "Katakana" Unicode character block.
1100         * @since 1.2
1101         */
1102        public static final UnicodeBlock KATAKANA =
1103            new UnicodeBlock("KATAKANA");
1104
1105        /**
1106         * Constant for the "Bopomofo" Unicode character block.
1107         * @since 1.2
1108         */
1109        public static final UnicodeBlock BOPOMOFO =
1110            new UnicodeBlock("BOPOMOFO");
1111
1112        /**
1113         * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1114         * @since 1.2
1115         */
1116        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1117            new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1118                             "HANGUL COMPATIBILITY JAMO",
1119                             "HANGULCOMPATIBILITYJAMO");
1120
1121        /**
1122         * Constant for the "Kanbun" Unicode character block.
1123         * @since 1.2
1124         */
1125        public static final UnicodeBlock KANBUN =
1126            new UnicodeBlock("KANBUN");
1127
1128        /**
1129         * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1130         * @since 1.2
1131         */
1132        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1133            new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1134                             "ENCLOSED CJK LETTERS AND MONTHS",
1135                             "ENCLOSEDCJKLETTERSANDMONTHS");
1136
1137        /**
1138         * Constant for the "CJK Compatibility" Unicode character block.
1139         * @since 1.2
1140         */
1141        public static final UnicodeBlock CJK_COMPATIBILITY =
1142            new UnicodeBlock("CJK_COMPATIBILITY",
1143                             "CJK COMPATIBILITY",
1144                             "CJKCOMPATIBILITY");
1145
1146        /**
1147         * Constant for the "CJK Unified Ideographs" Unicode character block.
1148         * @since 1.2
1149         */
1150        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1151            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1152                             "CJK UNIFIED IDEOGRAPHS",
1153                             "CJKUNIFIEDIDEOGRAPHS");
1154
1155        /**
1156         * Constant for the "Hangul Syllables" Unicode character block.
1157         * @since 1.2
1158         */
1159        public static final UnicodeBlock HANGUL_SYLLABLES =
1160            new UnicodeBlock("HANGUL_SYLLABLES",
1161                             "HANGUL SYLLABLES",
1162                             "HANGULSYLLABLES");
1163
1164        /**
1165         * Constant for the "Private Use Area" Unicode character block.
1166         * @since 1.2
1167         */
1168        public static final UnicodeBlock PRIVATE_USE_AREA =
1169            new UnicodeBlock("PRIVATE_USE_AREA",
1170                             "PRIVATE USE AREA",
1171                             "PRIVATEUSEAREA");
1172
1173        /**
1174         * Constant for the "CJK Compatibility Ideographs" Unicode character
1175         * block.
1176         * @since 1.2
1177         */
1178        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1179            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1180                             "CJK COMPATIBILITY IDEOGRAPHS",
1181                             "CJKCOMPATIBILITYIDEOGRAPHS");
1182
1183        /**
1184         * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1185         * @since 1.2
1186         */
1187        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1188            new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1189                             "ALPHABETIC PRESENTATION FORMS",
1190                             "ALPHABETICPRESENTATIONFORMS");
1191
1192        /**
1193         * Constant for the "Arabic Presentation Forms-A" Unicode character
1194         * block.
1195         * @since 1.2
1196         */
1197        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1198            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1199                             "ARABIC PRESENTATION FORMS-A",
1200                             "ARABICPRESENTATIONFORMS-A");
1201
1202        /**
1203         * Constant for the "Combining Half Marks" Unicode character block.
1204         * @since 1.2
1205         */
1206        public static final UnicodeBlock COMBINING_HALF_MARKS =
1207            new UnicodeBlock("COMBINING_HALF_MARKS",
1208                             "COMBINING HALF MARKS",
1209                             "COMBININGHALFMARKS");
1210
1211        /**
1212         * Constant for the "CJK Compatibility Forms" Unicode character block.
1213         * @since 1.2
1214         */
1215        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1216            new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1217                             "CJK COMPATIBILITY FORMS",
1218                             "CJKCOMPATIBILITYFORMS");
1219
1220        /**
1221         * Constant for the "Small Form Variants" Unicode character block.
1222         * @since 1.2
1223         */
1224        public static final UnicodeBlock SMALL_FORM_VARIANTS =
1225            new UnicodeBlock("SMALL_FORM_VARIANTS",
1226                             "SMALL FORM VARIANTS",
1227                             "SMALLFORMVARIANTS");
1228
1229        /**
1230         * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1231         * @since 1.2
1232         */
1233        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1234            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1235                             "ARABIC PRESENTATION FORMS-B",
1236                             "ARABICPRESENTATIONFORMS-B");
1237
1238        /**
1239         * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1240         * block.
1241         * @since 1.2
1242         */
1243        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1244            new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1245                             "HALFWIDTH AND FULLWIDTH FORMS",
1246                             "HALFWIDTHANDFULLWIDTHFORMS");
1247
1248        /**
1249         * Constant for the "Specials" Unicode character block.
1250         * @since 1.2
1251         */
1252        public static final UnicodeBlock SPECIALS =
1253            new UnicodeBlock("SPECIALS");
1254
1255        /**
1256         * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1257         *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1258         *             {@link #LOW_SURROGATES}. These new constants match
1259         *             the block definitions of the Unicode Standard.
1260         *             The {@link #of(char)} and {@link #of(int)} methods
1261         *             return the new constants, not SURROGATES_AREA.
1262         */
1263        @Deprecated
1264        public static final UnicodeBlock SURROGATES_AREA =
1265            new UnicodeBlock("SURROGATES_AREA", false);
1266
1267        /**
1268         * Constant for the "Syriac" Unicode character block.
1269         * @since 1.4
1270         */
1271        public static final UnicodeBlock SYRIAC =
1272            new UnicodeBlock("SYRIAC");
1273
1274        /**
1275         * Constant for the "Thaana" Unicode character block.
1276         * @since 1.4
1277         */
1278        public static final UnicodeBlock THAANA =
1279            new UnicodeBlock("THAANA");
1280
1281        /**
1282         * Constant for the "Sinhala" Unicode character block.
1283         * @since 1.4
1284         */
1285        public static final UnicodeBlock SINHALA =
1286            new UnicodeBlock("SINHALA");
1287
1288        /**
1289         * Constant for the "Myanmar" Unicode character block.
1290         * @since 1.4
1291         */
1292        public static final UnicodeBlock MYANMAR =
1293            new UnicodeBlock("MYANMAR");
1294
1295        /**
1296         * Constant for the "Ethiopic" Unicode character block.
1297         * @since 1.4
1298         */
1299        public static final UnicodeBlock ETHIOPIC =
1300            new UnicodeBlock("ETHIOPIC");
1301
1302        /**
1303         * Constant for the "Cherokee" Unicode character block.
1304         * @since 1.4
1305         */
1306        public static final UnicodeBlock CHEROKEE =
1307            new UnicodeBlock("CHEROKEE");
1308
1309        /**
1310         * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1311         * @since 1.4
1312         */
1313        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1314            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1315                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1316                             "UNIFIEDCANADIANABORIGINALSYLLABICS");
1317
1318        /**
1319         * Constant for the "Ogham" Unicode character block.
1320         * @since 1.4
1321         */
1322        public static final UnicodeBlock OGHAM =
1323            new UnicodeBlock("OGHAM");
1324
1325        /**
1326         * Constant for the "Runic" Unicode character block.
1327         * @since 1.4
1328         */
1329        public static final UnicodeBlock RUNIC =
1330            new UnicodeBlock("RUNIC");
1331
1332        /**
1333         * Constant for the "Khmer" Unicode character block.
1334         * @since 1.4
1335         */
1336        public static final UnicodeBlock KHMER =
1337            new UnicodeBlock("KHMER");
1338
1339        /**
1340         * Constant for the "Mongolian" Unicode character block.
1341         * @since 1.4
1342         */
1343        public static final UnicodeBlock MONGOLIAN =
1344            new UnicodeBlock("MONGOLIAN");
1345
1346        /**
1347         * Constant for the "Braille Patterns" Unicode character block.
1348         * @since 1.4
1349         */
1350        public static final UnicodeBlock BRAILLE_PATTERNS =
1351            new UnicodeBlock("BRAILLE_PATTERNS",
1352                             "BRAILLE PATTERNS",
1353                             "BRAILLEPATTERNS");
1354
1355        /**
1356         * Constant for the "CJK Radicals Supplement" Unicode character block.
1357         * @since 1.4
1358         */
1359        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1360            new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1361                             "CJK RADICALS SUPPLEMENT",
1362                             "CJKRADICALSSUPPLEMENT");
1363
1364        /**
1365         * Constant for the "Kangxi Radicals" Unicode character block.
1366         * @since 1.4
1367         */
1368        public static final UnicodeBlock KANGXI_RADICALS =
1369            new UnicodeBlock("KANGXI_RADICALS",
1370                             "KANGXI RADICALS",
1371                             "KANGXIRADICALS");
1372
1373        /**
1374         * Constant for the "Ideographic Description Characters" Unicode character block.
1375         * @since 1.4
1376         */
1377        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1378            new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1379                             "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1380                             "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1381
1382        /**
1383         * Constant for the "Bopomofo Extended" Unicode character block.
1384         * @since 1.4
1385         */
1386        public static final UnicodeBlock BOPOMOFO_EXTENDED =
1387            new UnicodeBlock("BOPOMOFO_EXTENDED",
1388                             "BOPOMOFO EXTENDED",
1389                             "BOPOMOFOEXTENDED");
1390
1391        /**
1392         * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1393         * @since 1.4
1394         */
1395        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1396            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1397                             "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1398                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1399
1400        /**
1401         * Constant for the "Yi Syllables" Unicode character block.
1402         * @since 1.4
1403         */
1404        public static final UnicodeBlock YI_SYLLABLES =
1405            new UnicodeBlock("YI_SYLLABLES",
1406                             "YI SYLLABLES",
1407                             "YISYLLABLES");
1408
1409        /**
1410         * Constant for the "Yi Radicals" Unicode character block.
1411         * @since 1.4
1412         */
1413        public static final UnicodeBlock YI_RADICALS =
1414            new UnicodeBlock("YI_RADICALS",
1415                             "YI RADICALS",
1416                             "YIRADICALS");
1417
1418        /**
1419         * Constant for the "Cyrillic Supplementary" Unicode character block.
1420         * @since 1.5
1421         */
1422        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1423            new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1424                             "CYRILLIC SUPPLEMENTARY",
1425                             "CYRILLICSUPPLEMENTARY",
1426                             "CYRILLIC SUPPLEMENT",
1427                             "CYRILLICSUPPLEMENT");
1428
1429        /**
1430         * Constant for the "Tagalog" Unicode character block.
1431         * @since 1.5
1432         */
1433        public static final UnicodeBlock TAGALOG =
1434            new UnicodeBlock("TAGALOG");
1435
1436        /**
1437         * Constant for the "Hanunoo" Unicode character block.
1438         * @since 1.5
1439         */
1440        public static final UnicodeBlock HANUNOO =
1441            new UnicodeBlock("HANUNOO");
1442
1443        /**
1444         * Constant for the "Buhid" Unicode character block.
1445         * @since 1.5
1446         */
1447        public static final UnicodeBlock BUHID =
1448            new UnicodeBlock("BUHID");
1449
1450        /**
1451         * Constant for the "Tagbanwa" Unicode character block.
1452         * @since 1.5
1453         */
1454        public static final UnicodeBlock TAGBANWA =
1455            new UnicodeBlock("TAGBANWA");
1456
1457        /**
1458         * Constant for the "Limbu" Unicode character block.
1459         * @since 1.5
1460         */
1461        public static final UnicodeBlock LIMBU =
1462            new UnicodeBlock("LIMBU");
1463
1464        /**
1465         * Constant for the "Tai Le" Unicode character block.
1466         * @since 1.5
1467         */
1468        public static final UnicodeBlock TAI_LE =
1469            new UnicodeBlock("TAI_LE",
1470                             "TAI LE",
1471                             "TAILE");
1472
1473        /**
1474         * Constant for the "Khmer Symbols" Unicode character block.
1475         * @since 1.5
1476         */
1477        public static final UnicodeBlock KHMER_SYMBOLS =
1478            new UnicodeBlock("KHMER_SYMBOLS",
1479                             "KHMER SYMBOLS",
1480                             "KHMERSYMBOLS");
1481
1482        /**
1483         * Constant for the "Phonetic Extensions" Unicode character block.
1484         * @since 1.5
1485         */
1486        public static final UnicodeBlock PHONETIC_EXTENSIONS =
1487            new UnicodeBlock("PHONETIC_EXTENSIONS",
1488                             "PHONETIC EXTENSIONS",
1489                             "PHONETICEXTENSIONS");
1490
1491        /**
1492         * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1493         * @since 1.5
1494         */
1495        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1496            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1497                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1498                             "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1499
1500        /**
1501         * Constant for the "Supplemental Arrows-A" Unicode character block.
1502         * @since 1.5
1503         */
1504        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1505            new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1506                             "SUPPLEMENTAL ARROWS-A",
1507                             "SUPPLEMENTALARROWS-A");
1508
1509        /**
1510         * Constant for the "Supplemental Arrows-B" Unicode character block.
1511         * @since 1.5
1512         */
1513        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1514            new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1515                             "SUPPLEMENTAL ARROWS-B",
1516                             "SUPPLEMENTALARROWS-B");
1517
1518        /**
1519         * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1520         * character block.
1521         * @since 1.5
1522         */
1523        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1524            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1525                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1526                             "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1527
1528        /**
1529         * Constant for the "Supplemental Mathematical Operators" Unicode
1530         * character block.
1531         * @since 1.5
1532         */
1533        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1534            new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1535                             "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1536                             "SUPPLEMENTALMATHEMATICALOPERATORS");
1537
1538        /**
1539         * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1540         * block.
1541         * @since 1.5
1542         */
1543        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1544            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1545                             "MISCELLANEOUS SYMBOLS AND ARROWS",
1546                             "MISCELLANEOUSSYMBOLSANDARROWS");
1547
1548        /**
1549         * Constant for the "Katakana Phonetic Extensions" Unicode character
1550         * block.
1551         * @since 1.5
1552         */
1553        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1554            new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1555                             "KATAKANA PHONETIC EXTENSIONS",
1556                             "KATAKANAPHONETICEXTENSIONS");
1557
1558        /**
1559         * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1560         * @since 1.5
1561         */
1562        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1563            new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1564                             "YIJING HEXAGRAM SYMBOLS",
1565                             "YIJINGHEXAGRAMSYMBOLS");
1566
1567        /**
1568         * Constant for the "Variation Selectors" Unicode character block.
1569         * @since 1.5
1570         */
1571        public static final UnicodeBlock VARIATION_SELECTORS =
1572            new UnicodeBlock("VARIATION_SELECTORS",
1573                             "VARIATION SELECTORS",
1574                             "VARIATIONSELECTORS");
1575
1576        /**
1577         * Constant for the "Linear B Syllabary" Unicode character block.
1578         * @since 1.5
1579         */
1580        public static final UnicodeBlock LINEAR_B_SYLLABARY =
1581            new UnicodeBlock("LINEAR_B_SYLLABARY",
1582                             "LINEAR B SYLLABARY",
1583                             "LINEARBSYLLABARY");
1584
1585        /**
1586         * Constant for the "Linear B Ideograms" Unicode character block.
1587         * @since 1.5
1588         */
1589        public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1590            new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1591                             "LINEAR B IDEOGRAMS",
1592                             "LINEARBIDEOGRAMS");
1593
1594        /**
1595         * Constant for the "Aegean Numbers" Unicode character block.
1596         * @since 1.5
1597         */
1598        public static final UnicodeBlock AEGEAN_NUMBERS =
1599            new UnicodeBlock("AEGEAN_NUMBERS",
1600                             "AEGEAN NUMBERS",
1601                             "AEGEANNUMBERS");
1602
1603        /**
1604         * Constant for the "Old Italic" Unicode character block.
1605         * @since 1.5
1606         */
1607        public static final UnicodeBlock OLD_ITALIC =
1608            new UnicodeBlock("OLD_ITALIC",
1609                             "OLD ITALIC",
1610                             "OLDITALIC");
1611
1612        /**
1613         * Constant for the "Gothic" Unicode character block.
1614         * @since 1.5
1615         */
1616        public static final UnicodeBlock GOTHIC =
1617            new UnicodeBlock("GOTHIC");
1618
1619        /**
1620         * Constant for the "Ugaritic" Unicode character block.
1621         * @since 1.5
1622         */
1623        public static final UnicodeBlock UGARITIC =
1624            new UnicodeBlock("UGARITIC");
1625
1626        /**
1627         * Constant for the "Deseret" Unicode character block.
1628         * @since 1.5
1629         */
1630        public static final UnicodeBlock DESERET =
1631            new UnicodeBlock("DESERET");
1632
1633        /**
1634         * Constant for the "Shavian" Unicode character block.
1635         * @since 1.5
1636         */
1637        public static final UnicodeBlock SHAVIAN =
1638            new UnicodeBlock("SHAVIAN");
1639
1640        /**
1641         * Constant for the "Osmanya" Unicode character block.
1642         * @since 1.5
1643         */
1644        public static final UnicodeBlock OSMANYA =
1645            new UnicodeBlock("OSMANYA");
1646
1647        /**
1648         * Constant for the "Cypriot Syllabary" Unicode character block.
1649         * @since 1.5
1650         */
1651        public static final UnicodeBlock CYPRIOT_SYLLABARY =
1652            new UnicodeBlock("CYPRIOT_SYLLABARY",
1653                             "CYPRIOT SYLLABARY",
1654                             "CYPRIOTSYLLABARY");
1655
1656        /**
1657         * Constant for the "Byzantine Musical Symbols" Unicode character block.
1658         * @since 1.5
1659         */
1660        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1661            new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1662                             "BYZANTINE MUSICAL SYMBOLS",
1663                             "BYZANTINEMUSICALSYMBOLS");
1664
1665        /**
1666         * Constant for the "Musical Symbols" Unicode character block.
1667         * @since 1.5
1668         */
1669        public static final UnicodeBlock MUSICAL_SYMBOLS =
1670            new UnicodeBlock("MUSICAL_SYMBOLS",
1671                             "MUSICAL SYMBOLS",
1672                             "MUSICALSYMBOLS");
1673
1674        /**
1675         * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1676         * @since 1.5
1677         */
1678        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1679            new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1680                             "TAI XUAN JING SYMBOLS",
1681                             "TAIXUANJINGSYMBOLS");
1682
1683        /**
1684         * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1685         * character block.
1686         * @since 1.5
1687         */
1688        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1689            new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1690                             "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1691                             "MATHEMATICALALPHANUMERICSYMBOLS");
1692
1693        /**
1694         * Constant for the "CJK Unified Ideographs Extension B" Unicode
1695         * character block.
1696         * @since 1.5
1697         */
1698        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1699            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1700                             "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1701                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1702
1703        /**
1704         * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1705         * @since 1.5
1706         */
1707        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1708            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1709                             "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1710                             "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1711
1712        /**
1713         * Constant for the "Tags" Unicode character block.
1714         * @since 1.5
1715         */
1716        public static final UnicodeBlock TAGS =
1717            new UnicodeBlock("TAGS");
1718
1719        /**
1720         * Constant for the "Variation Selectors Supplement" Unicode character
1721         * block.
1722         * @since 1.5
1723         */
1724        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1725            new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1726                             "VARIATION SELECTORS SUPPLEMENT",
1727                             "VARIATIONSELECTORSSUPPLEMENT");
1728
1729        /**
1730         * Constant for the "Supplementary Private Use Area-A" Unicode character
1731         * block.
1732         * @since 1.5
1733         */
1734        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1735            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1736                             "SUPPLEMENTARY PRIVATE USE AREA-A",
1737                             "SUPPLEMENTARYPRIVATEUSEAREA-A");
1738
1739        /**
1740         * Constant for the "Supplementary Private Use Area-B" Unicode character
1741         * block.
1742         * @since 1.5
1743         */
1744        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1745            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1746                             "SUPPLEMENTARY PRIVATE USE AREA-B",
1747                             "SUPPLEMENTARYPRIVATEUSEAREA-B");
1748
1749        /**
1750         * Constant for the "High Surrogates" Unicode character block.
1751         * This block represents codepoint values in the high surrogate
1752         * range: U+D800 through U+DB7F
1753         *
1754         * @since 1.5
1755         */
1756        public static final UnicodeBlock HIGH_SURROGATES =
1757            new UnicodeBlock("HIGH_SURROGATES",
1758                             "HIGH SURROGATES",
1759                             "HIGHSURROGATES");
1760
1761        /**
1762         * Constant for the "High Private Use Surrogates" Unicode character
1763         * block.
1764         * This block represents codepoint values in the private use high
1765         * surrogate range: U+DB80 through U+DBFF
1766         *
1767         * @since 1.5
1768         */
1769        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1770            new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1771                             "HIGH PRIVATE USE SURROGATES",
1772                             "HIGHPRIVATEUSESURROGATES");
1773
1774        /**
1775         * Constant for the "Low Surrogates" Unicode character block.
1776         * This block represents codepoint values in the low surrogate
1777         * range: U+DC00 through U+DFFF
1778         *
1779         * @since 1.5
1780         */
1781        public static final UnicodeBlock LOW_SURROGATES =
1782            new UnicodeBlock("LOW_SURROGATES",
1783                             "LOW SURROGATES",
1784                             "LOWSURROGATES");
1785
1786        /**
1787         * Constant for the "Arabic Supplement" Unicode character block.
1788         * @since 1.7
1789         */
1790        public static final UnicodeBlock ARABIC_SUPPLEMENT =
1791            new UnicodeBlock("ARABIC_SUPPLEMENT",
1792                             "ARABIC SUPPLEMENT",
1793                             "ARABICSUPPLEMENT");
1794
1795        /**
1796         * Constant for the "NKo" Unicode character block.
1797         * @since 1.7
1798         */
1799        public static final UnicodeBlock NKO =
1800            new UnicodeBlock("NKO");
1801
1802        /**
1803         * Constant for the "Samaritan" Unicode character block.
1804         * @since 1.7
1805         */
1806        public static final UnicodeBlock SAMARITAN =
1807            new UnicodeBlock("SAMARITAN");
1808
1809        /**
1810         * Constant for the "Mandaic" Unicode character block.
1811         * @since 1.7
1812         */
1813        public static final UnicodeBlock MANDAIC =
1814            new UnicodeBlock("MANDAIC");
1815
1816        /**
1817         * Constant for the "Ethiopic Supplement" Unicode character block.
1818         * @since 1.7
1819         */
1820        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1821            new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1822                             "ETHIOPIC SUPPLEMENT",
1823                             "ETHIOPICSUPPLEMENT");
1824
1825        /**
1826         * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1827         * Unicode character block.
1828         * @since 1.7
1829         */
1830        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1831            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1832                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1833                             "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1834
1835        /**
1836         * Constant for the "New Tai Lue" Unicode character block.
1837         * @since 1.7
1838         */
1839        public static final UnicodeBlock NEW_TAI_LUE =
1840            new UnicodeBlock("NEW_TAI_LUE",
1841                             "NEW TAI LUE",
1842                             "NEWTAILUE");
1843
1844        /**
1845         * Constant for the "Buginese" Unicode character block.
1846         * @since 1.7
1847         */
1848        public static final UnicodeBlock BUGINESE =
1849            new UnicodeBlock("BUGINESE");
1850
1851        /**
1852         * Constant for the "Tai Tham" Unicode character block.
1853         * @since 1.7
1854         */
1855        public static final UnicodeBlock TAI_THAM =
1856            new UnicodeBlock("TAI_THAM",
1857                             "TAI THAM",
1858                             "TAITHAM");
1859
1860        /**
1861         * Constant for the "Balinese" Unicode character block.
1862         * @since 1.7
1863         */
1864        public static final UnicodeBlock BALINESE =
1865            new UnicodeBlock("BALINESE");
1866
1867        /**
1868         * Constant for the "Sundanese" Unicode character block.
1869         * @since 1.7
1870         */
1871        public static final UnicodeBlock SUNDANESE =
1872            new UnicodeBlock("SUNDANESE");
1873
1874        /**
1875         * Constant for the "Batak" Unicode character block.
1876         * @since 1.7
1877         */
1878        public static final UnicodeBlock BATAK =
1879            new UnicodeBlock("BATAK");
1880
1881        /**
1882         * Constant for the "Lepcha" Unicode character block.
1883         * @since 1.7
1884         */
1885        public static final UnicodeBlock LEPCHA =
1886            new UnicodeBlock("LEPCHA");
1887
1888        /**
1889         * Constant for the "Ol Chiki" Unicode character block.
1890         * @since 1.7
1891         */
1892        public static final UnicodeBlock OL_CHIKI =
1893            new UnicodeBlock("OL_CHIKI",
1894                             "OL CHIKI",
1895                             "OLCHIKI");
1896
1897        /**
1898         * Constant for the "Vedic Extensions" Unicode character block.
1899         * @since 1.7
1900         */
1901        public static final UnicodeBlock VEDIC_EXTENSIONS =
1902            new UnicodeBlock("VEDIC_EXTENSIONS",
1903                             "VEDIC EXTENSIONS",
1904                             "VEDICEXTENSIONS");
1905
1906        /**
1907         * Constant for the "Phonetic Extensions Supplement" Unicode character
1908         * block.
1909         * @since 1.7
1910         */
1911        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1912            new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1913                             "PHONETIC EXTENSIONS SUPPLEMENT",
1914                             "PHONETICEXTENSIONSSUPPLEMENT");
1915
1916        /**
1917         * Constant for the "Combining Diacritical Marks Supplement" Unicode
1918         * character block.
1919         * @since 1.7
1920         */
1921        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1922            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1923                             "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1924                             "COMBININGDIACRITICALMARKSSUPPLEMENT");
1925
1926        /**
1927         * Constant for the "Glagolitic" Unicode character block.
1928         * @since 1.7
1929         */
1930        public static final UnicodeBlock GLAGOLITIC =
1931            new UnicodeBlock("GLAGOLITIC");
1932
1933        /**
1934         * Constant for the "Latin Extended-C" Unicode character block.
1935         * @since 1.7
1936         */
1937        public static final UnicodeBlock LATIN_EXTENDED_C =
1938            new UnicodeBlock("LATIN_EXTENDED_C",
1939                             "LATIN EXTENDED-C",
1940                             "LATINEXTENDED-C");
1941
1942        /**
1943         * Constant for the "Coptic" Unicode character block.
1944         * @since 1.7
1945         */
1946        public static final UnicodeBlock COPTIC =
1947            new UnicodeBlock("COPTIC");
1948
1949        /**
1950         * Constant for the "Georgian Supplement" Unicode character block.
1951         * @since 1.7
1952         */
1953        public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1954            new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1955                             "GEORGIAN SUPPLEMENT",
1956                             "GEORGIANSUPPLEMENT");
1957
1958        /**
1959         * Constant for the "Tifinagh" Unicode character block.
1960         * @since 1.7
1961         */
1962        public static final UnicodeBlock TIFINAGH =
1963            new UnicodeBlock("TIFINAGH");
1964
1965        /**
1966         * Constant for the "Ethiopic Extended" Unicode character block.
1967         * @since 1.7
1968         */
1969        public static final UnicodeBlock ETHIOPIC_EXTENDED =
1970            new UnicodeBlock("ETHIOPIC_EXTENDED",
1971                             "ETHIOPIC EXTENDED",
1972                             "ETHIOPICEXTENDED");
1973
1974        /**
1975         * Constant for the "Cyrillic Extended-A" Unicode character block.
1976         * @since 1.7
1977         */
1978        public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1979            new UnicodeBlock("CYRILLIC_EXTENDED_A",
1980                             "CYRILLIC EXTENDED-A",
1981                             "CYRILLICEXTENDED-A");
1982
1983        /**
1984         * Constant for the "Supplemental Punctuation" Unicode character block.
1985         * @since 1.7
1986         */
1987        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1988            new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1989                             "SUPPLEMENTAL PUNCTUATION",
1990                             "SUPPLEMENTALPUNCTUATION");
1991
1992        /**
1993         * Constant for the "CJK Strokes" Unicode character block.
1994         * @since 1.7
1995         */
1996        public static final UnicodeBlock CJK_STROKES =
1997            new UnicodeBlock("CJK_STROKES",
1998                             "CJK STROKES",
1999                             "CJKSTROKES");
2000
2001        /**
2002         * Constant for the "Lisu" Unicode character block.
2003         * @since 1.7
2004         */
2005        public static final UnicodeBlock LISU =
2006            new UnicodeBlock("LISU");
2007
2008        /**
2009         * Constant for the "Vai" Unicode character block.
2010         * @since 1.7
2011         */
2012        public static final UnicodeBlock VAI =
2013            new UnicodeBlock("VAI");
2014
2015        /**
2016         * Constant for the "Cyrillic Extended-B" Unicode character block.
2017         * @since 1.7
2018         */
2019        public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2020            new UnicodeBlock("CYRILLIC_EXTENDED_B",
2021                             "CYRILLIC EXTENDED-B",
2022                             "CYRILLICEXTENDED-B");
2023
2024        /**
2025         * Constant for the "Bamum" Unicode character block.
2026         * @since 1.7
2027         */
2028        public static final UnicodeBlock BAMUM =
2029            new UnicodeBlock("BAMUM");
2030
2031        /**
2032         * Constant for the "Modifier Tone Letters" Unicode character block.
2033         * @since 1.7
2034         */
2035        public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2036            new UnicodeBlock("MODIFIER_TONE_LETTERS",
2037                             "MODIFIER TONE LETTERS",
2038                             "MODIFIERTONELETTERS");
2039
2040        /**
2041         * Constant for the "Latin Extended-D" Unicode character block.
2042         * @since 1.7
2043         */
2044        public static final UnicodeBlock LATIN_EXTENDED_D =
2045            new UnicodeBlock("LATIN_EXTENDED_D",
2046                             "LATIN EXTENDED-D",
2047                             "LATINEXTENDED-D");
2048
2049        /**
2050         * Constant for the "Syloti Nagri" Unicode character block.
2051         * @since 1.7
2052         */
2053        public static final UnicodeBlock SYLOTI_NAGRI =
2054            new UnicodeBlock("SYLOTI_NAGRI",
2055                             "SYLOTI NAGRI",
2056                             "SYLOTINAGRI");
2057
2058        /**
2059         * Constant for the "Common Indic Number Forms" Unicode character block.
2060         * @since 1.7
2061         */
2062        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2063            new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2064                             "COMMON INDIC NUMBER FORMS",
2065                             "COMMONINDICNUMBERFORMS");
2066
2067        /**
2068         * Constant for the "Phags-pa" Unicode character block.
2069         * @since 1.7
2070         */
2071        public static final UnicodeBlock PHAGS_PA =
2072            new UnicodeBlock("PHAGS_PA",
2073                             "PHAGS-PA");
2074
2075        /**
2076         * Constant for the "Saurashtra" Unicode character block.
2077         * @since 1.7
2078         */
2079        public static final UnicodeBlock SAURASHTRA =
2080            new UnicodeBlock("SAURASHTRA");
2081
2082        /**
2083         * Constant for the "Devanagari Extended" Unicode character block.
2084         * @since 1.7
2085         */
2086        public static final UnicodeBlock DEVANAGARI_EXTENDED =
2087            new UnicodeBlock("DEVANAGARI_EXTENDED",
2088                             "DEVANAGARI EXTENDED",
2089                             "DEVANAGARIEXTENDED");
2090
2091        /**
2092         * Constant for the "Kayah Li" Unicode character block.
2093         * @since 1.7
2094         */
2095        public static final UnicodeBlock KAYAH_LI =
2096            new UnicodeBlock("KAYAH_LI",
2097                             "KAYAH LI",
2098                             "KAYAHLI");
2099
2100        /**
2101         * Constant for the "Rejang" Unicode character block.
2102         * @since 1.7
2103         */
2104        public static final UnicodeBlock REJANG =
2105            new UnicodeBlock("REJANG");
2106
2107        /**
2108         * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2109         * @since 1.7
2110         */
2111        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2112            new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2113                             "HANGUL JAMO EXTENDED-A",
2114                             "HANGULJAMOEXTENDED-A");
2115
2116        /**
2117         * Constant for the "Javanese" Unicode character block.
2118         * @since 1.7
2119         */
2120        public static final UnicodeBlock JAVANESE =
2121            new UnicodeBlock("JAVANESE");
2122
2123        /**
2124         * Constant for the "Cham" Unicode character block.
2125         * @since 1.7
2126         */
2127        public static final UnicodeBlock CHAM =
2128            new UnicodeBlock("CHAM");
2129
2130        /**
2131         * Constant for the "Myanmar Extended-A" Unicode character block.
2132         * @since 1.7
2133         */
2134        public static final UnicodeBlock MYANMAR_EXTENDED_A =
2135            new UnicodeBlock("MYANMAR_EXTENDED_A",
2136                             "MYANMAR EXTENDED-A",
2137                             "MYANMAREXTENDED-A");
2138
2139        /**
2140         * Constant for the "Tai Viet" Unicode character block.
2141         * @since 1.7
2142         */
2143        public static final UnicodeBlock TAI_VIET =
2144            new UnicodeBlock("TAI_VIET",
2145                             "TAI VIET",
2146                             "TAIVIET");
2147
2148        /**
2149         * Constant for the "Ethiopic Extended-A" Unicode character block.
2150         * @since 1.7
2151         */
2152        public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2153            new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2154                             "ETHIOPIC EXTENDED-A",
2155                             "ETHIOPICEXTENDED-A");
2156
2157        /**
2158         * Constant for the "Meetei Mayek" Unicode character block.
2159         * @since 1.7
2160         */
2161        public static final UnicodeBlock MEETEI_MAYEK =
2162            new UnicodeBlock("MEETEI_MAYEK",
2163                             "MEETEI MAYEK",
2164                             "MEETEIMAYEK");
2165
2166        /**
2167         * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2168         * @since 1.7
2169         */
2170        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2171            new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2172                             "HANGUL JAMO EXTENDED-B",
2173                             "HANGULJAMOEXTENDED-B");
2174
2175        /**
2176         * Constant for the "Vertical Forms" Unicode character block.
2177         * @since 1.7
2178         */
2179        public static final UnicodeBlock VERTICAL_FORMS =
2180            new UnicodeBlock("VERTICAL_FORMS",
2181                             "VERTICAL FORMS",
2182                             "VERTICALFORMS");
2183
2184        /**
2185         * Constant for the "Ancient Greek Numbers" Unicode character block.
2186         * @since 1.7
2187         */
2188        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2189            new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2190                             "ANCIENT GREEK NUMBERS",
2191                             "ANCIENTGREEKNUMBERS");
2192
2193        /**
2194         * Constant for the "Ancient Symbols" Unicode character block.
2195         * @since 1.7
2196         */
2197        public static final UnicodeBlock ANCIENT_SYMBOLS =
2198            new UnicodeBlock("ANCIENT_SYMBOLS",
2199                             "ANCIENT SYMBOLS",
2200                             "ANCIENTSYMBOLS");
2201
2202        /**
2203         * Constant for the "Phaistos Disc" Unicode character block.
2204         * @since 1.7
2205         */
2206        public static final UnicodeBlock PHAISTOS_DISC =
2207            new UnicodeBlock("PHAISTOS_DISC",
2208                             "PHAISTOS DISC",
2209                             "PHAISTOSDISC");
2210
2211        /**
2212         * Constant for the "Lycian" Unicode character block.
2213         * @since 1.7
2214         */
2215        public static final UnicodeBlock LYCIAN =
2216            new UnicodeBlock("LYCIAN");
2217
2218        /**
2219         * Constant for the "Carian" Unicode character block.
2220         * @since 1.7
2221         */
2222        public static final UnicodeBlock CARIAN =
2223            new UnicodeBlock("CARIAN");
2224
2225        /**
2226         * Constant for the "Old Persian" Unicode character block.
2227         * @since 1.7
2228         */
2229        public static final UnicodeBlock OLD_PERSIAN =
2230            new UnicodeBlock("OLD_PERSIAN",
2231                             "OLD PERSIAN",
2232                             "OLDPERSIAN");
2233
2234        /**
2235         * Constant for the "Imperial Aramaic" Unicode character block.
2236         * @since 1.7
2237         */
2238        public static final UnicodeBlock IMPERIAL_ARAMAIC =
2239            new UnicodeBlock("IMPERIAL_ARAMAIC",
2240                             "IMPERIAL ARAMAIC",
2241                             "IMPERIALARAMAIC");
2242
2243        /**
2244         * Constant for the "Phoenician" Unicode character block.
2245         * @since 1.7
2246         */
2247        public static final UnicodeBlock PHOENICIAN =
2248            new UnicodeBlock("PHOENICIAN");
2249
2250        /**
2251         * Constant for the "Lydian" Unicode character block.
2252         * @since 1.7
2253         */
2254        public static final UnicodeBlock LYDIAN =
2255            new UnicodeBlock("LYDIAN");
2256
2257        /**
2258         * Constant for the "Kharoshthi" Unicode character block.
2259         * @since 1.7
2260         */
2261        public static final UnicodeBlock KHAROSHTHI =
2262            new UnicodeBlock("KHAROSHTHI");
2263
2264        /**
2265         * Constant for the "Old South Arabian" Unicode character block.
2266         * @since 1.7
2267         */
2268        public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2269            new UnicodeBlock("OLD_SOUTH_ARABIAN",
2270                             "OLD SOUTH ARABIAN",
2271                             "OLDSOUTHARABIAN");
2272
2273        /**
2274         * Constant for the "Avestan" Unicode character block.
2275         * @since 1.7
2276         */
2277        public static final UnicodeBlock AVESTAN =
2278            new UnicodeBlock("AVESTAN");
2279
2280        /**
2281         * Constant for the "Inscriptional Parthian" Unicode character block.
2282         * @since 1.7
2283         */
2284        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2285            new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2286                             "INSCRIPTIONAL PARTHIAN",
2287                             "INSCRIPTIONALPARTHIAN");
2288
2289        /**
2290         * Constant for the "Inscriptional Pahlavi" Unicode character block.
2291         * @since 1.7
2292         */
2293        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2294            new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2295                             "INSCRIPTIONAL PAHLAVI",
2296                             "INSCRIPTIONALPAHLAVI");
2297
2298        /**
2299         * Constant for the "Old Turkic" Unicode character block.
2300         * @since 1.7
2301         */
2302        public static final UnicodeBlock OLD_TURKIC =
2303            new UnicodeBlock("OLD_TURKIC",
2304                             "OLD TURKIC",
2305                             "OLDTURKIC");
2306
2307        /**
2308         * Constant for the "Rumi Numeral Symbols" Unicode character block.
2309         * @since 1.7
2310         */
2311        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2312            new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2313                             "RUMI NUMERAL SYMBOLS",
2314                             "RUMINUMERALSYMBOLS");
2315
2316        /**
2317         * Constant for the "Brahmi" Unicode character block.
2318         * @since 1.7
2319         */
2320        public static final UnicodeBlock BRAHMI =
2321            new UnicodeBlock("BRAHMI");
2322
2323        /**
2324         * Constant for the "Kaithi" Unicode character block.
2325         * @since 1.7
2326         */
2327        public static final UnicodeBlock KAITHI =
2328            new UnicodeBlock("KAITHI");
2329
2330        /**
2331         * Constant for the "Cuneiform" Unicode character block.
2332         * @since 1.7
2333         */
2334        public static final UnicodeBlock CUNEIFORM =
2335            new UnicodeBlock("CUNEIFORM");
2336
2337        /**
2338         * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2339         * character block.
2340         * @since 1.7
2341         */
2342        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2343            new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2344                             "CUNEIFORM NUMBERS AND PUNCTUATION",
2345                             "CUNEIFORMNUMBERSANDPUNCTUATION");
2346
2347        /**
2348         * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2349         * @since 1.7
2350         */
2351        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2352            new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2353                             "EGYPTIAN HIEROGLYPHS",
2354                             "EGYPTIANHIEROGLYPHS");
2355
2356        /**
2357         * Constant for the "Bamum Supplement" Unicode character block.
2358         * @since 1.7
2359         */
2360        public static final UnicodeBlock BAMUM_SUPPLEMENT =
2361            new UnicodeBlock("BAMUM_SUPPLEMENT",
2362                             "BAMUM SUPPLEMENT",
2363                             "BAMUMSUPPLEMENT");
2364
2365        /**
2366         * Constant for the "Kana Supplement" Unicode character block.
2367         * @since 1.7
2368         */
2369        public static final UnicodeBlock KANA_SUPPLEMENT =
2370            new UnicodeBlock("KANA_SUPPLEMENT",
2371                             "KANA SUPPLEMENT",
2372                             "KANASUPPLEMENT");
2373
2374        /**
2375         * Constant for the "Ancient Greek Musical Notation" Unicode character
2376         * block.
2377         * @since 1.7
2378         */
2379        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2380            new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2381                             "ANCIENT GREEK MUSICAL NOTATION",
2382                             "ANCIENTGREEKMUSICALNOTATION");
2383
2384        /**
2385         * Constant for the "Counting Rod Numerals" Unicode character block.
2386         * @since 1.7
2387         */
2388        public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2389            new UnicodeBlock("COUNTING_ROD_NUMERALS",
2390                             "COUNTING ROD NUMERALS",
2391                             "COUNTINGRODNUMERALS");
2392
2393        /**
2394         * Constant for the "Mahjong Tiles" Unicode character block.
2395         * @since 1.7
2396         */
2397        public static final UnicodeBlock MAHJONG_TILES =
2398            new UnicodeBlock("MAHJONG_TILES",
2399                             "MAHJONG TILES",
2400                             "MAHJONGTILES");
2401
2402        /**
2403         * Constant for the "Domino Tiles" Unicode character block.
2404         * @since 1.7
2405         */
2406        public static final UnicodeBlock DOMINO_TILES =
2407            new UnicodeBlock("DOMINO_TILES",
2408                             "DOMINO TILES",
2409                             "DOMINOTILES");
2410
2411        /**
2412         * Constant for the "Playing Cards" Unicode character block.
2413         * @since 1.7
2414         */
2415        public static final UnicodeBlock PLAYING_CARDS =
2416            new UnicodeBlock("PLAYING_CARDS",
2417                             "PLAYING CARDS",
2418                             "PLAYINGCARDS");
2419
2420        /**
2421         * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2422         * block.
2423         * @since 1.7
2424         */
2425        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2426            new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2427                             "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2428                             "ENCLOSEDALPHANUMERICSUPPLEMENT");
2429
2430        /**
2431         * Constant for the "Enclosed Ideographic Supplement" Unicode character
2432         * block.
2433         * @since 1.7
2434         */
2435        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2436            new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2437                             "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2438                             "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2439
2440        /**
2441         * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2442         * character block.
2443         * @since 1.7
2444         */
2445        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2446            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2447                             "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2448                             "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2449
2450        /**
2451         * Constant for the "Emoticons" Unicode character block.
2452         * @since 1.7
2453         */
2454        public static final UnicodeBlock EMOTICONS =
2455            new UnicodeBlock("EMOTICONS");
2456
2457        /**
2458         * Constant for the "Transport And Map Symbols" Unicode character block.
2459         * @since 1.7
2460         */
2461        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2462            new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2463                             "TRANSPORT AND MAP SYMBOLS",
2464                             "TRANSPORTANDMAPSYMBOLS");
2465
2466        /**
2467         * Constant for the "Alchemical Symbols" Unicode character block.
2468         * @since 1.7
2469         */
2470        public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2471            new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2472                             "ALCHEMICAL SYMBOLS",
2473                             "ALCHEMICALSYMBOLS");
2474
2475        /**
2476         * Constant for the "CJK Unified Ideographs Extension C" Unicode
2477         * character block.
2478         * @since 1.7
2479         */
2480        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2481            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2482                             "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2483                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2484
2485        /**
2486         * Constant for the "CJK Unified Ideographs Extension D" Unicode
2487         * character block.
2488         * @since 1.7
2489         */
2490        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2491            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2492                             "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2493                             "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2494
2495        /**
2496         * Constant for the "Arabic Extended-A" Unicode character block.
2497         * @since 1.8
2498         */
2499        public static final UnicodeBlock ARABIC_EXTENDED_A =
2500            new UnicodeBlock("ARABIC_EXTENDED_A",
2501                             "ARABIC EXTENDED-A",
2502                             "ARABICEXTENDED-A");
2503
2504        /**
2505         * Constant for the "Sundanese Supplement" Unicode character block.
2506         * @since 1.8
2507         */
2508        public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2509            new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2510                             "SUNDANESE SUPPLEMENT",
2511                             "SUNDANESESUPPLEMENT");
2512
2513        /**
2514         * Constant for the "Meetei Mayek Extensions" Unicode character block.
2515         * @since 1.8
2516         */
2517        public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2518            new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2519                             "MEETEI MAYEK EXTENSIONS",
2520                             "MEETEIMAYEKEXTENSIONS");
2521
2522        /**
2523         * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2524         * @since 1.8
2525         */
2526        public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2527            new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2528                             "MEROITIC HIEROGLYPHS",
2529                             "MEROITICHIEROGLYPHS");
2530
2531        /**
2532         * Constant for the "Meroitic Cursive" Unicode character block.
2533         * @since 1.8
2534         */
2535        public static final UnicodeBlock MEROITIC_CURSIVE =
2536            new UnicodeBlock("MEROITIC_CURSIVE",
2537                             "MEROITIC CURSIVE",
2538                             "MEROITICCURSIVE");
2539
2540        /**
2541         * Constant for the "Sora Sompeng" Unicode character block.
2542         * @since 1.8
2543         */
2544        public static final UnicodeBlock SORA_SOMPENG =
2545            new UnicodeBlock("SORA_SOMPENG",
2546                             "SORA SOMPENG",
2547                             "SORASOMPENG");
2548
2549        /**
2550         * Constant for the "Chakma" Unicode character block.
2551         * @since 1.8
2552         */
2553        public static final UnicodeBlock CHAKMA =
2554            new UnicodeBlock("CHAKMA");
2555
2556        /**
2557         * Constant for the "Sharada" Unicode character block.
2558         * @since 1.8
2559         */
2560        public static final UnicodeBlock SHARADA =
2561            new UnicodeBlock("SHARADA");
2562
2563        /**
2564         * Constant for the "Takri" Unicode character block.
2565         * @since 1.8
2566         */
2567        public static final UnicodeBlock TAKRI =
2568            new UnicodeBlock("TAKRI");
2569
2570        /**
2571         * Constant for the "Miao" Unicode character block.
2572         * @since 1.8
2573         */
2574        public static final UnicodeBlock MIAO =
2575            new UnicodeBlock("MIAO");
2576
2577        /**
2578         * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2579         * character block.
2580         * @since 1.8
2581         */
2582        public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2583            new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2584                             "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2585                             "ARABICMATHEMATICALALPHABETICSYMBOLS");
2586
2587        private static final int blockStarts[] = {
2588            0x0000,   // 0000..007F; Basic Latin
2589            0x0080,   // 0080..00FF; Latin-1 Supplement
2590            0x0100,   // 0100..017F; Latin Extended-A
2591            0x0180,   // 0180..024F; Latin Extended-B
2592            0x0250,   // 0250..02AF; IPA Extensions
2593            0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2594            0x0300,   // 0300..036F; Combining Diacritical Marks
2595            0x0370,   // 0370..03FF; Greek and Coptic
2596            0x0400,   // 0400..04FF; Cyrillic
2597            0x0500,   // 0500..052F; Cyrillic Supplement
2598            0x0530,   // 0530..058F; Armenian
2599            0x0590,   // 0590..05FF; Hebrew
2600            0x0600,   // 0600..06FF; Arabic
2601            0x0700,   // 0700..074F; Syriac
2602            0x0750,   // 0750..077F; Arabic Supplement
2603            0x0780,   // 0780..07BF; Thaana
2604            0x07C0,   // 07C0..07FF; NKo
2605            0x0800,   // 0800..083F; Samaritan
2606            0x0840,   // 0840..085F; Mandaic
2607            0x0860,   //             unassigned
2608            0x08A0,   // 08A0..08FF; Arabic Extended-A
2609            0x0900,   // 0900..097F; Devanagari
2610            0x0980,   // 0980..09FF; Bengali
2611            0x0A00,   // 0A00..0A7F; Gurmukhi
2612            0x0A80,   // 0A80..0AFF; Gujarati
2613            0x0B00,   // 0B00..0B7F; Oriya
2614            0x0B80,   // 0B80..0BFF; Tamil
2615            0x0C00,   // 0C00..0C7F; Telugu
2616            0x0C80,   // 0C80..0CFF; Kannada
2617            0x0D00,   // 0D00..0D7F; Malayalam
2618            0x0D80,   // 0D80..0DFF; Sinhala
2619            0x0E00,   // 0E00..0E7F; Thai
2620            0x0E80,   // 0E80..0EFF; Lao
2621            0x0F00,   // 0F00..0FFF; Tibetan
2622            0x1000,   // 1000..109F; Myanmar
2623            0x10A0,   // 10A0..10FF; Georgian
2624            0x1100,   // 1100..11FF; Hangul Jamo
2625            0x1200,   // 1200..137F; Ethiopic
2626            0x1380,   // 1380..139F; Ethiopic Supplement
2627            0x13A0,   // 13A0..13FF; Cherokee
2628            0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2629            0x1680,   // 1680..169F; Ogham
2630            0x16A0,   // 16A0..16FF; Runic
2631            0x1700,   // 1700..171F; Tagalog
2632            0x1720,   // 1720..173F; Hanunoo
2633            0x1740,   // 1740..175F; Buhid
2634            0x1760,   // 1760..177F; Tagbanwa
2635            0x1780,   // 1780..17FF; Khmer
2636            0x1800,   // 1800..18AF; Mongolian
2637            0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2638            0x1900,   // 1900..194F; Limbu
2639            0x1950,   // 1950..197F; Tai Le
2640            0x1980,   // 1980..19DF; New Tai Lue
2641            0x19E0,   // 19E0..19FF; Khmer Symbols
2642            0x1A00,   // 1A00..1A1F; Buginese
2643            0x1A20,   // 1A20..1AAF; Tai Tham
2644            0x1AB0,   //             unassigned
2645            0x1B00,   // 1B00..1B7F; Balinese
2646            0x1B80,   // 1B80..1BBF; Sundanese
2647            0x1BC0,   // 1BC0..1BFF; Batak
2648            0x1C00,   // 1C00..1C4F; Lepcha
2649            0x1C50,   // 1C50..1C7F; Ol Chiki
2650            0x1C80,   //             unassigned
2651            0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2652            0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2653            0x1D00,   // 1D00..1D7F; Phonetic Extensions
2654            0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2655            0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2656            0x1E00,   // 1E00..1EFF; Latin Extended Additional
2657            0x1F00,   // 1F00..1FFF; Greek Extended
2658            0x2000,   // 2000..206F; General Punctuation
2659            0x2070,   // 2070..209F; Superscripts and Subscripts
2660            0x20A0,   // 20A0..20CF; Currency Symbols
2661            0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2662            0x2100,   // 2100..214F; Letterlike Symbols
2663            0x2150,   // 2150..218F; Number Forms
2664            0x2190,   // 2190..21FF; Arrows
2665            0x2200,   // 2200..22FF; Mathematical Operators
2666            0x2300,   // 2300..23FF; Miscellaneous Technical
2667            0x2400,   // 2400..243F; Control Pictures
2668            0x2440,   // 2440..245F; Optical Character Recognition
2669            0x2460,   // 2460..24FF; Enclosed Alphanumerics
2670            0x2500,   // 2500..257F; Box Drawing
2671            0x2580,   // 2580..259F; Block Elements
2672            0x25A0,   // 25A0..25FF; Geometric Shapes
2673            0x2600,   // 2600..26FF; Miscellaneous Symbols
2674            0x2700,   // 2700..27BF; Dingbats
2675            0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2676            0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2677            0x2800,   // 2800..28FF; Braille Patterns
2678            0x2900,   // 2900..297F; Supplemental Arrows-B
2679            0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2680            0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2681            0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2682            0x2C00,   // 2C00..2C5F; Glagolitic
2683            0x2C60,   // 2C60..2C7F; Latin Extended-C
2684            0x2C80,   // 2C80..2CFF; Coptic
2685            0x2D00,   // 2D00..2D2F; Georgian Supplement
2686            0x2D30,   // 2D30..2D7F; Tifinagh
2687            0x2D80,   // 2D80..2DDF; Ethiopic Extended
2688            0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2689            0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2690            0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2691            0x2F00,   // 2F00..2FDF; Kangxi Radicals
2692            0x2FE0,   //             unassigned
2693            0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2694            0x3000,   // 3000..303F; CJK Symbols and Punctuation
2695            0x3040,   // 3040..309F; Hiragana
2696            0x30A0,   // 30A0..30FF; Katakana
2697            0x3100,   // 3100..312F; Bopomofo
2698            0x3130,   // 3130..318F; Hangul Compatibility Jamo
2699            0x3190,   // 3190..319F; Kanbun
2700            0x31A0,   // 31A0..31BF; Bopomofo Extended
2701            0x31C0,   // 31C0..31EF; CJK Strokes
2702            0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2703            0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2704            0x3300,   // 3300..33FF; CJK Compatibility
2705            0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2706            0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2707            0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2708            0xA000,   // A000..A48F; Yi Syllables
2709            0xA490,   // A490..A4CF; Yi Radicals
2710            0xA4D0,   // A4D0..A4FF; Lisu
2711            0xA500,   // A500..A63F; Vai
2712            0xA640,   // A640..A69F; Cyrillic Extended-B
2713            0xA6A0,   // A6A0..A6FF; Bamum
2714            0xA700,   // A700..A71F; Modifier Tone Letters
2715            0xA720,   // A720..A7FF; Latin Extended-D
2716            0xA800,   // A800..A82F; Syloti Nagri
2717            0xA830,   // A830..A83F; Common Indic Number Forms
2718            0xA840,   // A840..A87F; Phags-pa
2719            0xA880,   // A880..A8DF; Saurashtra
2720            0xA8E0,   // A8E0..A8FF; Devanagari Extended
2721            0xA900,   // A900..A92F; Kayah Li
2722            0xA930,   // A930..A95F; Rejang
2723            0xA960,   // A960..A97F; Hangul Jamo Extended-A
2724            0xA980,   // A980..A9DF; Javanese
2725            0xA9E0,   //             unassigned
2726            0xAA00,   // AA00..AA5F; Cham
2727            0xAA60,   // AA60..AA7F; Myanmar Extended-A
2728            0xAA80,   // AA80..AADF; Tai Viet
2729            0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2730            0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2731            0xAB30,   //             unassigned
2732            0xABC0,   // ABC0..ABFF; Meetei Mayek
2733            0xAC00,   // AC00..D7AF; Hangul Syllables
2734            0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2735            0xD800,   // D800..DB7F; High Surrogates
2736            0xDB80,   // DB80..DBFF; High Private Use Surrogates
2737            0xDC00,   // DC00..DFFF; Low Surrogates
2738            0xE000,   // E000..F8FF; Private Use Area
2739            0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2740            0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2741            0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2742            0xFE00,   // FE00..FE0F; Variation Selectors
2743            0xFE10,   // FE10..FE1F; Vertical Forms
2744            0xFE20,   // FE20..FE2F; Combining Half Marks
2745            0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2746            0xFE50,   // FE50..FE6F; Small Form Variants
2747            0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2748            0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2749            0xFFF0,   // FFF0..FFFF; Specials
2750            0x10000,  // 10000..1007F; Linear B Syllabary
2751            0x10080,  // 10080..100FF; Linear B Ideograms
2752            0x10100,  // 10100..1013F; Aegean Numbers
2753            0x10140,  // 10140..1018F; Ancient Greek Numbers
2754            0x10190,  // 10190..101CF; Ancient Symbols
2755            0x101D0,  // 101D0..101FF; Phaistos Disc
2756            0x10200,  //               unassigned
2757            0x10280,  // 10280..1029F; Lycian
2758            0x102A0,  // 102A0..102DF; Carian
2759            0x102E0,  //               unassigned
2760            0x10300,  // 10300..1032F; Old Italic
2761            0x10330,  // 10330..1034F; Gothic
2762            0x10350,  //               unassigned
2763            0x10380,  // 10380..1039F; Ugaritic
2764            0x103A0,  // 103A0..103DF; Old Persian
2765            0x103E0,  //               unassigned
2766            0x10400,  // 10400..1044F; Deseret
2767            0x10450,  // 10450..1047F; Shavian
2768            0x10480,  // 10480..104AF; Osmanya
2769            0x104B0,  //               unassigned
2770            0x10800,  // 10800..1083F; Cypriot Syllabary
2771            0x10840,  // 10840..1085F; Imperial Aramaic
2772            0x10860,  //               unassigned
2773            0x10900,  // 10900..1091F; Phoenician
2774            0x10920,  // 10920..1093F; Lydian
2775            0x10940,  //               unassigned
2776            0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2777            0x109A0,  // 109A0..109FF; Meroitic Cursive
2778            0x10A00,  // 10A00..10A5F; Kharoshthi
2779            0x10A60,  // 10A60..10A7F; Old South Arabian
2780            0x10A80,  //               unassigned
2781            0x10B00,  // 10B00..10B3F; Avestan
2782            0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2783            0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2784            0x10B80,  //               unassigned
2785            0x10C00,  // 10C00..10C4F; Old Turkic
2786            0x10C50,  //               unassigned
2787            0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2788            0x10E80,  //               unassigned
2789            0x11000,  // 11000..1107F; Brahmi
2790            0x11080,  // 11080..110CF; Kaithi
2791            0x110D0,  // 110D0..110FF; Sora Sompeng
2792            0x11100,  // 11100..1114F; Chakma
2793            0x11150,  //               unassigned
2794            0x11180,  // 11180..111DF; Sharada
2795            0x111E0,  //               unassigned
2796            0x11680,  // 11680..116CF; Takri
2797            0x116D0,  //               unassigned
2798            0x12000,  // 12000..123FF; Cuneiform
2799            0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2800            0x12480,  //               unassigned
2801            0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2802            0x13430,  //               unassigned
2803            0x16800,  // 16800..16A3F; Bamum Supplement
2804            0x16A40,  //               unassigned
2805            0x16F00,  // 16F00..16F9F; Miao
2806            0x16FA0,  //               unassigned
2807            0x1B000,  // 1B000..1B0FF; Kana Supplement
2808            0x1B100,  //               unassigned
2809            0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2810            0x1D100,  // 1D100..1D1FF; Musical Symbols
2811            0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2812            0x1D250,  //               unassigned
2813            0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2814            0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2815            0x1D380,  //               unassigned
2816            0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2817            0x1D800,  //               unassigned
2818            0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2819            0x1EF00,  //               unassigned
2820            0x1F000,  // 1F000..1F02F; Mahjong Tiles
2821            0x1F030,  // 1F030..1F09F; Domino Tiles
2822            0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2823            0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2824            0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2825            0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2826            0x1F600,  // 1F600..1F64F; Emoticons
2827            0x1F650,  //               unassigned
2828            0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2829            0x1F700,  // 1F700..1F77F; Alchemical Symbols
2830            0x1F780,  //               unassigned
2831            0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2832            0x2A6E0,  //               unassigned
2833            0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2834            0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2835            0x2B820,  //               unassigned
2836            0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2837            0x2FA20,  //               unassigned
2838            0xE0000,  // E0000..E007F; Tags
2839            0xE0080,  //               unassigned
2840            0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2841            0xE01F0,  //               unassigned
2842            0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2843            0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2844        };
2845
2846        private static final UnicodeBlock[] blocks = {
2847            BASIC_LATIN,
2848            LATIN_1_SUPPLEMENT,
2849            LATIN_EXTENDED_A,
2850            LATIN_EXTENDED_B,
2851            IPA_EXTENSIONS,
2852            SPACING_MODIFIER_LETTERS,
2853            COMBINING_DIACRITICAL_MARKS,
2854            GREEK,
2855            CYRILLIC,
2856            CYRILLIC_SUPPLEMENTARY,
2857            ARMENIAN,
2858            HEBREW,
2859            ARABIC,
2860            SYRIAC,
2861            ARABIC_SUPPLEMENT,
2862            THAANA,
2863            NKO,
2864            SAMARITAN,
2865            MANDAIC,
2866            null,
2867            ARABIC_EXTENDED_A,
2868            DEVANAGARI,
2869            BENGALI,
2870            GURMUKHI,
2871            GUJARATI,
2872            ORIYA,
2873            TAMIL,
2874            TELUGU,
2875            KANNADA,
2876            MALAYALAM,
2877            SINHALA,
2878            THAI,
2879            LAO,
2880            TIBETAN,
2881            MYANMAR,
2882            GEORGIAN,
2883            HANGUL_JAMO,
2884            ETHIOPIC,
2885            ETHIOPIC_SUPPLEMENT,
2886            CHEROKEE,
2887            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2888            OGHAM,
2889            RUNIC,
2890            TAGALOG,
2891            HANUNOO,
2892            BUHID,
2893            TAGBANWA,
2894            KHMER,
2895            MONGOLIAN,
2896            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2897            LIMBU,
2898            TAI_LE,
2899            NEW_TAI_LUE,
2900            KHMER_SYMBOLS,
2901            BUGINESE,
2902            TAI_THAM,
2903            null,
2904            BALINESE,
2905            SUNDANESE,
2906            BATAK,
2907            LEPCHA,
2908            OL_CHIKI,
2909            null,
2910            SUNDANESE_SUPPLEMENT,
2911            VEDIC_EXTENSIONS,
2912            PHONETIC_EXTENSIONS,
2913            PHONETIC_EXTENSIONS_SUPPLEMENT,
2914            COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2915            LATIN_EXTENDED_ADDITIONAL,
2916            GREEK_EXTENDED,
2917            GENERAL_PUNCTUATION,
2918            SUPERSCRIPTS_AND_SUBSCRIPTS,
2919            CURRENCY_SYMBOLS,
2920            COMBINING_MARKS_FOR_SYMBOLS,
2921            LETTERLIKE_SYMBOLS,
2922            NUMBER_FORMS,
2923            ARROWS,
2924            MATHEMATICAL_OPERATORS,
2925            MISCELLANEOUS_TECHNICAL,
2926            CONTROL_PICTURES,
2927            OPTICAL_CHARACTER_RECOGNITION,
2928            ENCLOSED_ALPHANUMERICS,
2929            BOX_DRAWING,
2930            BLOCK_ELEMENTS,
2931            GEOMETRIC_SHAPES,
2932            MISCELLANEOUS_SYMBOLS,
2933            DINGBATS,
2934            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2935            SUPPLEMENTAL_ARROWS_A,
2936            BRAILLE_PATTERNS,
2937            SUPPLEMENTAL_ARROWS_B,
2938            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2939            SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2940            MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2941            GLAGOLITIC,
2942            LATIN_EXTENDED_C,
2943            COPTIC,
2944            GEORGIAN_SUPPLEMENT,
2945            TIFINAGH,
2946            ETHIOPIC_EXTENDED,
2947            CYRILLIC_EXTENDED_A,
2948            SUPPLEMENTAL_PUNCTUATION,
2949            CJK_RADICALS_SUPPLEMENT,
2950            KANGXI_RADICALS,
2951            null,
2952            IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2953            CJK_SYMBOLS_AND_PUNCTUATION,
2954            HIRAGANA,
2955            KATAKANA,
2956            BOPOMOFO,
2957            HANGUL_COMPATIBILITY_JAMO,
2958            KANBUN,
2959            BOPOMOFO_EXTENDED,
2960            CJK_STROKES,
2961            KATAKANA_PHONETIC_EXTENSIONS,
2962            ENCLOSED_CJK_LETTERS_AND_MONTHS,
2963            CJK_COMPATIBILITY,
2964            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2965            YIJING_HEXAGRAM_SYMBOLS,
2966            CJK_UNIFIED_IDEOGRAPHS,
2967            YI_SYLLABLES,
2968            YI_RADICALS,
2969            LISU,
2970            VAI,
2971            CYRILLIC_EXTENDED_B,
2972            BAMUM,
2973            MODIFIER_TONE_LETTERS,
2974            LATIN_EXTENDED_D,
2975            SYLOTI_NAGRI,
2976            COMMON_INDIC_NUMBER_FORMS,
2977            PHAGS_PA,
2978            SAURASHTRA,
2979            DEVANAGARI_EXTENDED,
2980            KAYAH_LI,
2981            REJANG,
2982            HANGUL_JAMO_EXTENDED_A,
2983            JAVANESE,
2984            null,
2985            CHAM,
2986            MYANMAR_EXTENDED_A,
2987            TAI_VIET,
2988            MEETEI_MAYEK_EXTENSIONS,
2989            ETHIOPIC_EXTENDED_A,
2990            null,
2991            MEETEI_MAYEK,
2992            HANGUL_SYLLABLES,
2993            HANGUL_JAMO_EXTENDED_B,
2994            HIGH_SURROGATES,
2995            HIGH_PRIVATE_USE_SURROGATES,
2996            LOW_SURROGATES,
2997            PRIVATE_USE_AREA,
2998            CJK_COMPATIBILITY_IDEOGRAPHS,
2999            ALPHABETIC_PRESENTATION_FORMS,
3000            ARABIC_PRESENTATION_FORMS_A,
3001            VARIATION_SELECTORS,
3002            VERTICAL_FORMS,
3003            COMBINING_HALF_MARKS,
3004            CJK_COMPATIBILITY_FORMS,
3005            SMALL_FORM_VARIANTS,
3006            ARABIC_PRESENTATION_FORMS_B,
3007            HALFWIDTH_AND_FULLWIDTH_FORMS,
3008            SPECIALS,
3009            LINEAR_B_SYLLABARY,
3010            LINEAR_B_IDEOGRAMS,
3011            AEGEAN_NUMBERS,
3012            ANCIENT_GREEK_NUMBERS,
3013            ANCIENT_SYMBOLS,
3014            PHAISTOS_DISC,
3015            null,
3016            LYCIAN,
3017            CARIAN,
3018            null,
3019            OLD_ITALIC,
3020            GOTHIC,
3021            null,
3022            UGARITIC,
3023            OLD_PERSIAN,
3024            null,
3025            DESERET,
3026            SHAVIAN,
3027            OSMANYA,
3028            null,
3029            CYPRIOT_SYLLABARY,
3030            IMPERIAL_ARAMAIC,
3031            null,
3032            PHOENICIAN,
3033            LYDIAN,
3034            null,
3035            MEROITIC_HIEROGLYPHS,
3036            MEROITIC_CURSIVE,
3037            KHAROSHTHI,
3038            OLD_SOUTH_ARABIAN,
3039            null,
3040            AVESTAN,
3041            INSCRIPTIONAL_PARTHIAN,
3042            INSCRIPTIONAL_PAHLAVI,
3043            null,
3044            OLD_TURKIC,
3045            null,
3046            RUMI_NUMERAL_SYMBOLS,
3047            null,
3048            BRAHMI,
3049            KAITHI,
3050            SORA_SOMPENG,
3051            CHAKMA,
3052            null,
3053            SHARADA,
3054            null,
3055            TAKRI,
3056            null,
3057            CUNEIFORM,
3058            CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3059            null,
3060            EGYPTIAN_HIEROGLYPHS,
3061            null,
3062            BAMUM_SUPPLEMENT,
3063            null,
3064            MIAO,
3065            null,
3066            KANA_SUPPLEMENT,
3067            null,
3068            BYZANTINE_MUSICAL_SYMBOLS,
3069            MUSICAL_SYMBOLS,
3070            ANCIENT_GREEK_MUSICAL_NOTATION,
3071            null,
3072            TAI_XUAN_JING_SYMBOLS,
3073            COUNTING_ROD_NUMERALS,
3074            null,
3075            MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3076            null,
3077            ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3078            null,
3079            MAHJONG_TILES,
3080            DOMINO_TILES,
3081            PLAYING_CARDS,
3082            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3083            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3084            MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3085            EMOTICONS,
3086            null,
3087            TRANSPORT_AND_MAP_SYMBOLS,
3088            ALCHEMICAL_SYMBOLS,
3089            null,
3090            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3091            null,
3092            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3093            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3094            null,
3095            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3096            null,
3097            TAGS,
3098            null,
3099            VARIATION_SELECTORS_SUPPLEMENT,
3100            null,
3101            SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3102            SUPPLEMENTARY_PRIVATE_USE_AREA_B
3103        };
3104
3105
3106        /**
3107         * Returns the object representing the Unicode block containing the
3108         * given character, or {@code null} if the character is not a
3109         * member of a defined block.
3110         *
3111         * <p><b>Note:</b> This method cannot handle
3112         * <a href="Character.html#supplementary"> supplementary
3113         * characters</a>.  To support all Unicode characters, including
3114         * supplementary characters, use the {@link #of(int)} method.
3115         *
3116         * @param   c  The character in question
3117         * @return  The {@code UnicodeBlock} instance representing the
3118         *          Unicode block of which this character is a member, or
3119         *          {@code null} if the character is not a member of any
3120         *          Unicode block
3121         */
3122        public static UnicodeBlock of(char c) {
3123            return of((int)c);
3124        }
3125
3126        /**
3127         * Returns the object representing the Unicode block
3128         * containing the given character (Unicode code point), or
3129         * {@code null} if the character is not a member of a
3130         * defined block.
3131         *
3132         * @param   codePoint the character (Unicode code point) in question.
3133         * @return  The {@code UnicodeBlock} instance representing the
3134         *          Unicode block of which this character is a member, or
3135         *          {@code null} if the character is not a member of any
3136         *          Unicode block
3137         * @exception IllegalArgumentException if the specified
3138         * {@code codePoint} is an invalid Unicode code point.
3139         * @see Character#isValidCodePoint(int)
3140         * @since   1.5
3141         */
3142        public static UnicodeBlock of(int codePoint) {
3143            if (!isValidCodePoint(codePoint)) {
3144                throw new IllegalArgumentException();
3145            }
3146
3147            int top, bottom, current;
3148            bottom = 0;
3149            top = blockStarts.length;
3150            current = top/2;
3151
3152            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3153            while (top - bottom > 1) {
3154                if (codePoint >= blockStarts[current]) {
3155                    bottom = current;
3156                } else {
3157                    top = current;
3158                }
3159                current = (top + bottom) / 2;
3160            }
3161            return blocks[current];
3162        }
3163
3164        /**
3165         * Returns the UnicodeBlock with the given name. Block
3166         * names are determined by The Unicode Standard. The file
3167         * Blocks-&lt;version&gt;.txt defines blocks for a particular
3168         * version of the standard. The {@link Character} class specifies
3169         * the version of the standard that it supports.
3170         * <p>
3171         * This method accepts block names in the following forms:
3172         * <ol>
3173         * <li> Canonical block names as defined by the Unicode Standard.
3174         * For example, the standard defines a "Basic Latin" block. Therefore, this
3175         * method accepts "Basic Latin" as a valid block name. The documentation of
3176         * each UnicodeBlock provides the canonical name.
3177         * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3178         * is a valid block name for the "Basic Latin" block.
3179         * <li>The text representation of each constant UnicodeBlock identifier.
3180         * For example, this method will return the {@link #BASIC_LATIN} block if
3181         * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3182         * hyphens in the canonical name with underscores.
3183         * </ol>
3184         * Finally, character case is ignored for all of the valid block name forms.
3185         * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3186         * The en_US locale's case mapping rules are used to provide case-insensitive
3187         * string comparisons for block name validation.
3188         * <p>
3189         * If the Unicode Standard changes block names, both the previous and
3190         * current names will be accepted.
3191         *
3192         * @param blockName A {@code UnicodeBlock} name.
3193         * @return The {@code UnicodeBlock} instance identified
3194         *         by {@code blockName}
3195         * @throws IllegalArgumentException if {@code blockName} is an
3196         *         invalid name
3197         * @throws NullPointerException if {@code blockName} is null
3198         * @since 1.5
3199         */
3200        public static final UnicodeBlock forName(String blockName) {
3201            UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3202            if (block == null) {
3203                throw new IllegalArgumentException();
3204            }
3205            return block;
3206        }
3207    }
3208
3209
3210    /**
3211     * A family of character subsets representing the character scripts
3212     * defined in the <a href="http://www.unicode.org/reports/tr24/">
3213     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3214     * character is assigned to a single Unicode script, either a specific
3215     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3216     * one of the following three special values,
3217     * {@link Character.UnicodeScript#INHERITED Inherited},
3218     * {@link Character.UnicodeScript#COMMON Common} or
3219     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3220     *
3221     * @since 1.7
3222     */
3223    public static enum UnicodeScript {
3224        /**
3225         * Unicode script "Common".
3226         */
3227        COMMON,
3228
3229        /**
3230         * Unicode script "Latin".
3231         */
3232        LATIN,
3233
3234        /**
3235         * Unicode script "Greek".
3236         */
3237        GREEK,
3238
3239        /**
3240         * Unicode script "Cyrillic".
3241         */
3242        CYRILLIC,
3243
3244        /**
3245         * Unicode script "Armenian".
3246         */
3247        ARMENIAN,
3248
3249        /**
3250         * Unicode script "Hebrew".
3251         */
3252        HEBREW,
3253
3254        /**
3255         * Unicode script "Arabic".
3256         */
3257        ARABIC,
3258
3259        /**
3260         * Unicode script "Syriac".
3261         */
3262        SYRIAC,
3263
3264        /**
3265         * Unicode script "Thaana".
3266         */
3267        THAANA,
3268
3269        /**
3270         * Unicode script "Devanagari".
3271         */
3272        DEVANAGARI,
3273
3274        /**
3275         * Unicode script "Bengali".
3276         */
3277        BENGALI,
3278
3279        /**
3280         * Unicode script "Gurmukhi".
3281         */
3282        GURMUKHI,
3283
3284        /**
3285         * Unicode script "Gujarati".
3286         */
3287        GUJARATI,
3288
3289        /**
3290         * Unicode script "Oriya".
3291         */
3292        ORIYA,
3293
3294        /**
3295         * Unicode script "Tamil".
3296         */
3297        TAMIL,
3298
3299        /**
3300         * Unicode script "Telugu".
3301         */
3302        TELUGU,
3303
3304        /**
3305         * Unicode script "Kannada".
3306         */
3307        KANNADA,
3308
3309        /**
3310         * Unicode script "Malayalam".
3311         */
3312        MALAYALAM,
3313
3314        /**
3315         * Unicode script "Sinhala".
3316         */
3317        SINHALA,
3318
3319        /**
3320         * Unicode script "Thai".
3321         */
3322        THAI,
3323
3324        /**
3325         * Unicode script "Lao".
3326         */
3327        LAO,
3328
3329        /**
3330         * Unicode script "Tibetan".
3331         */
3332        TIBETAN,
3333
3334        /**
3335         * Unicode script "Myanmar".
3336         */
3337        MYANMAR,
3338
3339        /**
3340         * Unicode script "Georgian".
3341         */
3342        GEORGIAN,
3343
3344        /**
3345         * Unicode script "Hangul".
3346         */
3347        HANGUL,
3348
3349        /**
3350         * Unicode script "Ethiopic".
3351         */
3352        ETHIOPIC,
3353
3354        /**
3355         * Unicode script "Cherokee".
3356         */
3357        CHEROKEE,
3358
3359        /**
3360         * Unicode script "Canadian_Aboriginal".
3361         */
3362        CANADIAN_ABORIGINAL,
3363
3364        /**
3365         * Unicode script "Ogham".
3366         */
3367        OGHAM,
3368
3369        /**
3370         * Unicode script "Runic".
3371         */
3372        RUNIC,
3373
3374        /**
3375         * Unicode script "Khmer".
3376         */
3377        KHMER,
3378
3379        /**
3380         * Unicode script "Mongolian".
3381         */
3382        MONGOLIAN,
3383
3384        /**
3385         * Unicode script "Hiragana".
3386         */
3387        HIRAGANA,
3388
3389        /**
3390         * Unicode script "Katakana".
3391         */
3392        KATAKANA,
3393
3394        /**
3395         * Unicode script "Bopomofo".
3396         */
3397        BOPOMOFO,
3398
3399        /**
3400         * Unicode script "Han".
3401         */
3402        HAN,
3403
3404        /**
3405         * Unicode script "Yi".
3406         */
3407        YI,
3408
3409        /**
3410         * Unicode script "Old_Italic".
3411         */
3412        OLD_ITALIC,
3413
3414        /**
3415         * Unicode script "Gothic".
3416         */
3417        GOTHIC,
3418
3419        /**
3420         * Unicode script "Deseret".
3421         */
3422        DESERET,
3423
3424        /**
3425         * Unicode script "Inherited".
3426         */
3427        INHERITED,
3428
3429        /**
3430         * Unicode script "Tagalog".
3431         */
3432        TAGALOG,
3433
3434        /**
3435         * Unicode script "Hanunoo".
3436         */
3437        HANUNOO,
3438
3439        /**
3440         * Unicode script "Buhid".
3441         */
3442        BUHID,
3443
3444        /**
3445         * Unicode script "Tagbanwa".
3446         */
3447        TAGBANWA,
3448
3449        /**
3450         * Unicode script "Limbu".
3451         */
3452        LIMBU,
3453
3454        /**
3455         * Unicode script "Tai_Le".
3456         */
3457        TAI_LE,
3458
3459        /**
3460         * Unicode script "Linear_B".
3461         */
3462        LINEAR_B,
3463
3464        /**
3465         * Unicode script "Ugaritic".
3466         */
3467        UGARITIC,
3468
3469        /**
3470         * Unicode script "Shavian".
3471         */
3472        SHAVIAN,
3473
3474        /**
3475         * Unicode script "Osmanya".
3476         */
3477        OSMANYA,
3478
3479        /**
3480         * Unicode script "Cypriot".
3481         */
3482        CYPRIOT,
3483
3484        /**
3485         * Unicode script "Braille".
3486         */
3487        BRAILLE,
3488
3489        /**
3490         * Unicode script "Buginese".
3491         */
3492        BUGINESE,
3493
3494        /**
3495         * Unicode script "Coptic".
3496         */
3497        COPTIC,
3498
3499        /**
3500         * Unicode script "New_Tai_Lue".
3501         */
3502        NEW_TAI_LUE,
3503
3504        /**
3505         * Unicode script "Glagolitic".
3506         */
3507        GLAGOLITIC,
3508
3509        /**
3510         * Unicode script "Tifinagh".
3511         */
3512        TIFINAGH,
3513
3514        /**
3515         * Unicode script "Syloti_Nagri".
3516         */
3517        SYLOTI_NAGRI,
3518
3519        /**
3520         * Unicode script "Old_Persian".
3521         */
3522        OLD_PERSIAN,
3523
3524        /**
3525         * Unicode script "Kharoshthi".
3526         */
3527        KHAROSHTHI,
3528
3529        /**
3530         * Unicode script "Balinese".
3531         */
3532        BALINESE,
3533
3534        /**
3535         * Unicode script "Cuneiform".
3536         */
3537        CUNEIFORM,
3538
3539        /**
3540         * Unicode script "Phoenician".
3541         */
3542        PHOENICIAN,
3543
3544        /**
3545         * Unicode script "Phags_Pa".
3546         */
3547        PHAGS_PA,
3548
3549        /**
3550         * Unicode script "Nko".
3551         */
3552        NKO,
3553
3554        /**
3555         * Unicode script "Sundanese".
3556         */
3557        SUNDANESE,
3558
3559        /**
3560         * Unicode script "Batak".
3561         */
3562        BATAK,
3563
3564        /**
3565         * Unicode script "Lepcha".
3566         */
3567        LEPCHA,
3568
3569        /**
3570         * Unicode script "Ol_Chiki".
3571         */
3572        OL_CHIKI,
3573
3574        /**
3575         * Unicode script "Vai".
3576         */
3577        VAI,
3578
3579        /**
3580         * Unicode script "Saurashtra".
3581         */
3582        SAURASHTRA,
3583
3584        /**
3585         * Unicode script "Kayah_Li".
3586         */
3587        KAYAH_LI,
3588
3589        /**
3590         * Unicode script "Rejang".
3591         */
3592        REJANG,
3593
3594        /**
3595         * Unicode script "Lycian".
3596         */
3597        LYCIAN,
3598
3599        /**
3600         * Unicode script "Carian".
3601         */
3602        CARIAN,
3603
3604        /**
3605         * Unicode script "Lydian".
3606         */
3607        LYDIAN,
3608
3609        /**
3610         * Unicode script "Cham".
3611         */
3612        CHAM,
3613
3614        /**
3615         * Unicode script "Tai_Tham".
3616         */
3617        TAI_THAM,
3618
3619        /**
3620         * Unicode script "Tai_Viet".
3621         */
3622        TAI_VIET,
3623
3624        /**
3625         * Unicode script "Avestan".
3626         */
3627        AVESTAN,
3628
3629        /**
3630         * Unicode script "Egyptian_Hieroglyphs".
3631         */
3632        EGYPTIAN_HIEROGLYPHS,
3633
3634        /**
3635         * Unicode script "Samaritan".
3636         */
3637        SAMARITAN,
3638
3639        /**
3640         * Unicode script "Mandaic".
3641         */
3642        MANDAIC,
3643
3644        /**
3645         * Unicode script "Lisu".
3646         */
3647        LISU,
3648
3649        /**
3650         * Unicode script "Bamum".
3651         */
3652        BAMUM,
3653
3654        /**
3655         * Unicode script "Javanese".
3656         */
3657        JAVANESE,
3658
3659        /**
3660         * Unicode script "Meetei_Mayek".
3661         */
3662        MEETEI_MAYEK,
3663
3664        /**
3665         * Unicode script "Imperial_Aramaic".
3666         */
3667        IMPERIAL_ARAMAIC,
3668
3669        /**
3670         * Unicode script "Old_South_Arabian".
3671         */
3672        OLD_SOUTH_ARABIAN,
3673
3674        /**
3675         * Unicode script "Inscriptional_Parthian".
3676         */
3677        INSCRIPTIONAL_PARTHIAN,
3678
3679        /**
3680         * Unicode script "Inscriptional_Pahlavi".
3681         */
3682        INSCRIPTIONAL_PAHLAVI,
3683
3684        /**
3685         * Unicode script "Old_Turkic".
3686         */
3687        OLD_TURKIC,
3688
3689        /**
3690         * Unicode script "Brahmi".
3691         */
3692        BRAHMI,
3693
3694        /**
3695         * Unicode script "Kaithi".
3696         */
3697        KAITHI,
3698
3699        /**
3700         * Unicode script "Meroitic Hieroglyphs".
3701         */
3702        MEROITIC_HIEROGLYPHS,
3703
3704        /**
3705         * Unicode script "Meroitic Cursive".
3706         */
3707        MEROITIC_CURSIVE,
3708
3709        /**
3710         * Unicode script "Sora Sompeng".
3711         */
3712        SORA_SOMPENG,
3713
3714        /**
3715         * Unicode script "Chakma".
3716         */
3717        CHAKMA,
3718
3719        /**
3720         * Unicode script "Sharada".
3721         */
3722        SHARADA,
3723
3724        /**
3725         * Unicode script "Takri".
3726         */
3727        TAKRI,
3728
3729        /**
3730         * Unicode script "Miao".
3731         */
3732        MIAO,
3733
3734        /**
3735         * Unicode script "Unknown".
3736         */
3737        UNKNOWN;
3738
3739        private static final int[] scriptStarts = {
3740            0x0000,   // 0000..0040; COMMON
3741            0x0041,   // 0041..005A; LATIN
3742            0x005B,   // 005B..0060; COMMON
3743            0x0061,   // 0061..007A; LATIN
3744            0x007B,   // 007B..00A9; COMMON
3745            0x00AA,   // 00AA..00AA; LATIN
3746            0x00AB,   // 00AB..00B9; COMMON
3747            0x00BA,   // 00BA..00BA; LATIN
3748            0x00BB,   // 00BB..00BF; COMMON
3749            0x00C0,   // 00C0..00D6; LATIN
3750            0x00D7,   // 00D7..00D7; COMMON
3751            0x00D8,   // 00D8..00F6; LATIN
3752            0x00F7,   // 00F7..00F7; COMMON
3753            0x00F8,   // 00F8..02B8; LATIN
3754            0x02B9,   // 02B9..02DF; COMMON
3755            0x02E0,   // 02E0..02E4; LATIN
3756            0x02E5,   // 02E5..02E9; COMMON
3757            0x02EA,   // 02EA..02EB; BOPOMOFO
3758            0x02EC,   // 02EC..02FF; COMMON
3759            0x0300,   // 0300..036F; INHERITED
3760            0x0370,   // 0370..0373; GREEK
3761            0x0374,   // 0374..0374; COMMON
3762            0x0375,   // 0375..037D; GREEK
3763            0x037E,   // 037E..0383; COMMON
3764            0x0384,   // 0384..0384; GREEK
3765            0x0385,   // 0385..0385; COMMON
3766            0x0386,   // 0386..0386; GREEK
3767            0x0387,   // 0387..0387; COMMON
3768            0x0388,   // 0388..03E1; GREEK
3769            0x03E2,   // 03E2..03EF; COPTIC
3770            0x03F0,   // 03F0..03FF; GREEK
3771            0x0400,   // 0400..0484; CYRILLIC
3772            0x0485,   // 0485..0486; INHERITED
3773            0x0487,   // 0487..0530; CYRILLIC
3774            0x0531,   // 0531..0588; ARMENIAN
3775            0x0589,   // 0589..0589; COMMON
3776            0x058A,   // 058A..0590; ARMENIAN
3777            0x0591,   // 0591..05FF; HEBREW
3778            0x0600,   // 0600..060B; ARABIC
3779            0x060C,   // 060C..060C; COMMON
3780            0x060D,   // 060D..061A; ARABIC
3781            0x061B,   // 061B..061D; COMMON
3782            0x061E,   // 061E..061E; ARABIC
3783            0x061F,   // 061F..061F; COMMON
3784            0x0620,   // 0620..063F; ARABIC
3785            0x0640,   // 0640..0640; COMMON
3786            0x0641,   // 0641..064A; ARABIC
3787            0x064B,   // 064B..0655; INHERITED
3788            0x0656,   // 0656..065F; ARABIC
3789            0x0660,   // 0660..0669; COMMON
3790            0x066A,   // 066A..066F; ARABIC
3791            0x0670,   // 0670..0670; INHERITED
3792            0x0671,   // 0671..06DC; ARABIC
3793            0x06DD,   // 06DD..06DD; COMMON
3794            0x06DE,   // 06DE..06FF; ARABIC
3795            0x0700,   // 0700..074F; SYRIAC
3796            0x0750,   // 0750..077F; ARABIC
3797            0x0780,   // 0780..07BF; THAANA
3798            0x07C0,   // 07C0..07FF; NKO
3799            0x0800,   // 0800..083F; SAMARITAN
3800            0x0840,   // 0840..089F; MANDAIC
3801            0x08A0,   // 08A0..08FF; ARABIC
3802            0x0900,   // 0900..0950; DEVANAGARI
3803            0x0951,   // 0951..0952; INHERITED
3804            0x0953,   // 0953..0963; DEVANAGARI
3805            0x0964,   // 0964..0965; COMMON
3806            0x0966,   // 0966..0980; DEVANAGARI
3807            0x0981,   // 0981..0A00; BENGALI
3808            0x0A01,   // 0A01..0A80; GURMUKHI
3809            0x0A81,   // 0A81..0B00; GUJARATI
3810            0x0B01,   // 0B01..0B81; ORIYA
3811            0x0B82,   // 0B82..0C00; TAMIL
3812            0x0C01,   // 0C01..0C81; TELUGU
3813            0x0C82,   // 0C82..0CF0; KANNADA
3814            0x0D02,   // 0D02..0D81; MALAYALAM
3815            0x0D82,   // 0D82..0E00; SINHALA
3816            0x0E01,   // 0E01..0E3E; THAI
3817            0x0E3F,   // 0E3F..0E3F; COMMON
3818            0x0E40,   // 0E40..0E80; THAI
3819            0x0E81,   // 0E81..0EFF; LAO
3820            0x0F00,   // 0F00..0FD4; TIBETAN
3821            0x0FD5,   // 0FD5..0FD8; COMMON
3822            0x0FD9,   // 0FD9..0FFF; TIBETAN
3823            0x1000,   // 1000..109F; MYANMAR
3824            0x10A0,   // 10A0..10FA; GEORGIAN
3825            0x10FB,   // 10FB..10FB; COMMON
3826            0x10FC,   // 10FC..10FF; GEORGIAN
3827            0x1100,   // 1100..11FF; HANGUL
3828            0x1200,   // 1200..139F; ETHIOPIC
3829            0x13A0,   // 13A0..13FF; CHEROKEE
3830            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3831            0x1680,   // 1680..169F; OGHAM
3832            0x16A0,   // 16A0..16EA; RUNIC
3833            0x16EB,   // 16EB..16ED; COMMON
3834            0x16EE,   // 16EE..16FF; RUNIC
3835            0x1700,   // 1700..171F; TAGALOG
3836            0x1720,   // 1720..1734; HANUNOO
3837            0x1735,   // 1735..173F; COMMON
3838            0x1740,   // 1740..175F; BUHID
3839            0x1760,   // 1760..177F; TAGBANWA
3840            0x1780,   // 1780..17FF; KHMER
3841            0x1800,   // 1800..1801; MONGOLIAN
3842            0x1802,   // 1802..1803; COMMON
3843            0x1804,   // 1804..1804; MONGOLIAN
3844            0x1805,   // 1805..1805; COMMON
3845            0x1806,   // 1806..18AF; MONGOLIAN
3846            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3847            0x1900,   // 1900..194F; LIMBU
3848            0x1950,   // 1950..197F; TAI_LE
3849            0x1980,   // 1980..19DF; NEW_TAI_LUE
3850            0x19E0,   // 19E0..19FF; KHMER
3851            0x1A00,   // 1A00..1A1F; BUGINESE
3852            0x1A20,   // 1A20..1AFF; TAI_THAM
3853            0x1B00,   // 1B00..1B7F; BALINESE
3854            0x1B80,   // 1B80..1BBF; SUNDANESE
3855            0x1BC0,   // 1BC0..1BFF; BATAK
3856            0x1C00,   // 1C00..1C4F; LEPCHA
3857            0x1C50,   // 1C50..1CBF; OL_CHIKI
3858            0x1CC0,   // 1CC0..1CCF; SUNDANESE
3859            0x1CD0,   // 1CD0..1CD2; INHERITED
3860            0x1CD3,   // 1CD3..1CD3; COMMON
3861            0x1CD4,   // 1CD4..1CE0; INHERITED
3862            0x1CE1,   // 1CE1..1CE1; COMMON
3863            0x1CE2,   // 1CE2..1CE8; INHERITED
3864            0x1CE9,   // 1CE9..1CEC; COMMON
3865            0x1CED,   // 1CED..1CED; INHERITED
3866            0x1CEE,   // 1CEE..1CF3; COMMON
3867            0x1CF4,   // 1CF4..1CF4; INHERITED
3868            0x1CF5,   // 1CF5..1CFF; COMMON
3869            0x1D00,   // 1D00..1D25; LATIN
3870            0x1D26,   // 1D26..1D2A; GREEK
3871            0x1D2B,   // 1D2B..1D2B; CYRILLIC
3872            0x1D2C,   // 1D2C..1D5C; LATIN
3873            0x1D5D,   // 1D5D..1D61; GREEK
3874            0x1D62,   // 1D62..1D65; LATIN
3875            0x1D66,   // 1D66..1D6A; GREEK
3876            0x1D6B,   // 1D6B..1D77; LATIN
3877            0x1D78,   // 1D78..1D78; CYRILLIC
3878            0x1D79,   // 1D79..1DBE; LATIN
3879            0x1DBF,   // 1DBF..1DBF; GREEK
3880            0x1DC0,   // 1DC0..1DFF; INHERITED
3881            0x1E00,   // 1E00..1EFF; LATIN
3882            0x1F00,   // 1F00..1FFF; GREEK
3883            0x2000,   // 2000..200B; COMMON
3884            0x200C,   // 200C..200D; INHERITED
3885            0x200E,   // 200E..2070; COMMON
3886            0x2071,   // 2071..2073; LATIN
3887            0x2074,   // 2074..207E; COMMON
3888            0x207F,   // 207F..207F; LATIN
3889            0x2080,   // 2080..208F; COMMON
3890            0x2090,   // 2090..209F; LATIN
3891            0x20A0,   // 20A0..20CF; COMMON
3892            0x20D0,   // 20D0..20FF; INHERITED
3893            0x2100,   // 2100..2125; COMMON
3894            0x2126,   // 2126..2126; GREEK
3895            0x2127,   // 2127..2129; COMMON
3896            0x212A,   // 212A..212B; LATIN
3897            0x212C,   // 212C..2131; COMMON
3898            0x2132,   // 2132..2132; LATIN
3899            0x2133,   // 2133..214D; COMMON
3900            0x214E,   // 214E..214E; LATIN
3901            0x214F,   // 214F..215F; COMMON
3902            0x2160,   // 2160..2188; LATIN
3903            0x2189,   // 2189..27FF; COMMON
3904            0x2800,   // 2800..28FF; BRAILLE
3905            0x2900,   // 2900..2BFF; COMMON
3906            0x2C00,   // 2C00..2C5F; GLAGOLITIC
3907            0x2C60,   // 2C60..2C7F; LATIN
3908            0x2C80,   // 2C80..2CFF; COPTIC
3909            0x2D00,   // 2D00..2D2F; GEORGIAN
3910            0x2D30,   // 2D30..2D7F; TIFINAGH
3911            0x2D80,   // 2D80..2DDF; ETHIOPIC
3912            0x2DE0,   // 2DE0..2DFF; CYRILLIC
3913            0x2E00,   // 2E00..2E7F; COMMON
3914            0x2E80,   // 2E80..2FEF; HAN
3915            0x2FF0,   // 2FF0..3004; COMMON
3916            0x3005,   // 3005..3005; HAN
3917            0x3006,   // 3006..3006; COMMON
3918            0x3007,   // 3007..3007; HAN
3919            0x3008,   // 3008..3020; COMMON
3920            0x3021,   // 3021..3029; HAN
3921            0x302A,   // 302A..302D; INHERITED
3922            0x302E,   // 302E..302F; HANGUL
3923            0x3030,   // 3030..3037; COMMON
3924            0x3038,   // 3038..303B; HAN
3925            0x303C,   // 303C..3040; COMMON
3926            0x3041,   // 3041..3098; HIRAGANA
3927            0x3099,   // 3099..309A; INHERITED
3928            0x309B,   // 309B..309C; COMMON
3929            0x309D,   // 309D..309F; HIRAGANA
3930            0x30A0,   // 30A0..30A0; COMMON
3931            0x30A1,   // 30A1..30FA; KATAKANA
3932            0x30FB,   // 30FB..30FC; COMMON
3933            0x30FD,   // 30FD..3104; KATAKANA
3934            0x3105,   // 3105..3130; BOPOMOFO
3935            0x3131,   // 3131..318F; HANGUL
3936            0x3190,   // 3190..319F; COMMON
3937            0x31A0,   // 31A0..31BF; BOPOMOFO
3938            0x31C0,   // 31C0..31EF; COMMON
3939            0x31F0,   // 31F0..31FF; KATAKANA
3940            0x3200,   // 3200..321F; HANGUL
3941            0x3220,   // 3220..325F; COMMON
3942            0x3260,   // 3260..327E; HANGUL
3943            0x327F,   // 327F..32CF; COMMON
3944            0x32D0,   // 32D0..3357; KATAKANA
3945            0x3358,   // 3358..33FF; COMMON
3946            0x3400,   // 3400..4DBF; HAN
3947            0x4DC0,   // 4DC0..4DFF; COMMON
3948            0x4E00,   // 4E00..9FFF; HAN
3949            0xA000,   // A000..A4CF; YI
3950            0xA4D0,   // A4D0..A4FF; LISU
3951            0xA500,   // A500..A63F; VAI
3952            0xA640,   // A640..A69F; CYRILLIC
3953            0xA6A0,   // A6A0..A6FF; BAMUM
3954            0xA700,   // A700..A721; COMMON
3955            0xA722,   // A722..A787; LATIN
3956            0xA788,   // A788..A78A; COMMON
3957            0xA78B,   // A78B..A7FF; LATIN
3958            0xA800,   // A800..A82F; SYLOTI_NAGRI
3959            0xA830,   // A830..A83F; COMMON
3960            0xA840,   // A840..A87F; PHAGS_PA
3961            0xA880,   // A880..A8DF; SAURASHTRA
3962            0xA8E0,   // A8E0..A8FF; DEVANAGARI
3963            0xA900,   // A900..A92F; KAYAH_LI
3964            0xA930,   // A930..A95F; REJANG
3965            0xA960,   // A960..A97F; HANGUL
3966            0xA980,   // A980..A9FF; JAVANESE
3967            0xAA00,   // AA00..AA5F; CHAM
3968            0xAA60,   // AA60..AA7F; MYANMAR
3969            0xAA80,   // AA80..AADF; TAI_VIET
3970            0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3971            0xAB01,   // AB01..ABBF; ETHIOPIC
3972            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3973            0xAC00,   // AC00..D7FB; HANGUL
3974            0xD7FC,   // D7FC..F8FF; UNKNOWN
3975            0xF900,   // F900..FAFF; HAN
3976            0xFB00,   // FB00..FB12; LATIN
3977            0xFB13,   // FB13..FB1C; ARMENIAN
3978            0xFB1D,   // FB1D..FB4F; HEBREW
3979            0xFB50,   // FB50..FD3D; ARABIC
3980            0xFD3E,   // FD3E..FD4F; COMMON
3981            0xFD50,   // FD50..FDFC; ARABIC
3982            0xFDFD,   // FDFD..FDFF; COMMON
3983            0xFE00,   // FE00..FE0F; INHERITED
3984            0xFE10,   // FE10..FE1F; COMMON
3985            0xFE20,   // FE20..FE2F; INHERITED
3986            0xFE30,   // FE30..FE6F; COMMON
3987            0xFE70,   // FE70..FEFE; ARABIC
3988            0xFEFF,   // FEFF..FF20; COMMON
3989            0xFF21,   // FF21..FF3A; LATIN
3990            0xFF3B,   // FF3B..FF40; COMMON
3991            0xFF41,   // FF41..FF5A; LATIN
3992            0xFF5B,   // FF5B..FF65; COMMON
3993            0xFF66,   // FF66..FF6F; KATAKANA
3994            0xFF70,   // FF70..FF70; COMMON
3995            0xFF71,   // FF71..FF9D; KATAKANA
3996            0xFF9E,   // FF9E..FF9F; COMMON
3997            0xFFA0,   // FFA0..FFDF; HANGUL
3998            0xFFE0,   // FFE0..FFFF; COMMON
3999            0x10000,  // 10000..100FF; LINEAR_B
4000            0x10100,  // 10100..1013F; COMMON
4001            0x10140,  // 10140..1018F; GREEK
4002            0x10190,  // 10190..101FC; COMMON
4003            0x101FD,  // 101FD..1027F; INHERITED
4004            0x10280,  // 10280..1029F; LYCIAN
4005            0x102A0,  // 102A0..102FF; CARIAN
4006            0x10300,  // 10300..1032F; OLD_ITALIC
4007            0x10330,  // 10330..1037F; GOTHIC
4008            0x10380,  // 10380..1039F; UGARITIC
4009            0x103A0,  // 103A0..103FF; OLD_PERSIAN
4010            0x10400,  // 10400..1044F; DESERET
4011            0x10450,  // 10450..1047F; SHAVIAN
4012            0x10480,  // 10480..107FF; OSMANYA
4013            0x10800,  // 10800..1083F; CYPRIOT
4014            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
4015            0x10900,  // 10900..1091F; PHOENICIAN
4016            0x10920,  // 10920..1097F; LYDIAN
4017            0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
4018            0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
4019            0x10A00,  // 10A00..10A5F; KHAROSHTHI
4020            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4021            0x10B00,  // 10B00..10B3F; AVESTAN
4022            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4023            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4024            0x10C00,  // 10C00..10E5F; OLD_TURKIC
4025            0x10E60,  // 10E60..10FFF; ARABIC
4026            0x11000,  // 11000..1107F; BRAHMI
4027            0x11080,  // 11080..110CF; KAITHI
4028            0x110D0,  // 110D0..110FF; SORA_SOMPENG
4029            0x11100,  // 11100..1117F; CHAKMA
4030            0x11180,  // 11180..1167F; SHARADA
4031            0x11680,  // 11680..116CF; TAKRI
4032            0x12000,  // 12000..12FFF; CUNEIFORM
4033            0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4034            0x16800,  // 16800..16A38; BAMUM
4035            0x16F00,  // 16F00..16F9F; MIAO
4036            0x1B000,  // 1B000..1B000; KATAKANA
4037            0x1B001,  // 1B001..1CFFF; HIRAGANA
4038            0x1D000,  // 1D000..1D166; COMMON
4039            0x1D167,  // 1D167..1D169; INHERITED
4040            0x1D16A,  // 1D16A..1D17A; COMMON
4041            0x1D17B,  // 1D17B..1D182; INHERITED
4042            0x1D183,  // 1D183..1D184; COMMON
4043            0x1D185,  // 1D185..1D18B; INHERITED
4044            0x1D18C,  // 1D18C..1D1A9; COMMON
4045            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4046            0x1D1AE,  // 1D1AE..1D1FF; COMMON
4047            0x1D200,  // 1D200..1D2FF; GREEK
4048            0x1D300,  // 1D300..1EDFF; COMMON
4049            0x1EE00,  // 1EE00..1EFFF; ARABIC
4050            0x1F000,  // 1F000..1F1FF; COMMON
4051            0x1F200,  // 1F200..1F200; HIRAGANA
4052            0x1F201,  // 1F210..1FFFF; COMMON
4053            0x20000,  // 20000..E0000; HAN
4054            0xE0001,  // E0001..E00FF; COMMON
4055            0xE0100,  // E0100..E01EF; INHERITED
4056            0xE01F0   // E01F0..10FFFF; UNKNOWN
4057
4058        };
4059
4060        private static final UnicodeScript[] scripts = {
4061            COMMON,
4062            LATIN,
4063            COMMON,
4064            LATIN,
4065            COMMON,
4066            LATIN,
4067            COMMON,
4068            LATIN,
4069            COMMON,
4070            LATIN,
4071            COMMON,
4072            LATIN,
4073            COMMON,
4074            LATIN,
4075            COMMON,
4076            LATIN,
4077            COMMON,
4078            BOPOMOFO,
4079            COMMON,
4080            INHERITED,
4081            GREEK,
4082            COMMON,
4083            GREEK,
4084            COMMON,
4085            GREEK,
4086            COMMON,
4087            GREEK,
4088            COMMON,
4089            GREEK,
4090            COPTIC,
4091            GREEK,
4092            CYRILLIC,
4093            INHERITED,
4094            CYRILLIC,
4095            ARMENIAN,
4096            COMMON,
4097            ARMENIAN,
4098            HEBREW,
4099            ARABIC,
4100            COMMON,
4101            ARABIC,
4102            COMMON,
4103            ARABIC,
4104            COMMON,
4105            ARABIC,
4106            COMMON,
4107            ARABIC,
4108            INHERITED,
4109            ARABIC,
4110            COMMON,
4111            ARABIC,
4112            INHERITED,
4113            ARABIC,
4114            COMMON,
4115            ARABIC,
4116            SYRIAC,
4117            ARABIC,
4118            THAANA,
4119            NKO,
4120            SAMARITAN,
4121            MANDAIC,
4122            ARABIC,
4123            DEVANAGARI,
4124            INHERITED,
4125            DEVANAGARI,
4126            COMMON,
4127            DEVANAGARI,
4128            BENGALI,
4129            GURMUKHI,
4130            GUJARATI,
4131            ORIYA,
4132            TAMIL,
4133            TELUGU,
4134            KANNADA,
4135            MALAYALAM,
4136            SINHALA,
4137            THAI,
4138            COMMON,
4139            THAI,
4140            LAO,
4141            TIBETAN,
4142            COMMON,
4143            TIBETAN,
4144            MYANMAR,
4145            GEORGIAN,
4146            COMMON,
4147            GEORGIAN,
4148            HANGUL,
4149            ETHIOPIC,
4150            CHEROKEE,
4151            CANADIAN_ABORIGINAL,
4152            OGHAM,
4153            RUNIC,
4154            COMMON,
4155            RUNIC,
4156            TAGALOG,
4157            HANUNOO,
4158            COMMON,
4159            BUHID,
4160            TAGBANWA,
4161            KHMER,
4162            MONGOLIAN,
4163            COMMON,
4164            MONGOLIAN,
4165            COMMON,
4166            MONGOLIAN,
4167            CANADIAN_ABORIGINAL,
4168            LIMBU,
4169            TAI_LE,
4170            NEW_TAI_LUE,
4171            KHMER,
4172            BUGINESE,
4173            TAI_THAM,
4174            BALINESE,
4175            SUNDANESE,
4176            BATAK,
4177            LEPCHA,
4178            OL_CHIKI,
4179            SUNDANESE,
4180            INHERITED,
4181            COMMON,
4182            INHERITED,
4183            COMMON,
4184            INHERITED,
4185            COMMON,
4186            INHERITED,
4187            COMMON,
4188            INHERITED,
4189            COMMON,
4190            LATIN,
4191            GREEK,
4192            CYRILLIC,
4193            LATIN,
4194            GREEK,
4195            LATIN,
4196            GREEK,
4197            LATIN,
4198            CYRILLIC,
4199            LATIN,
4200            GREEK,
4201            INHERITED,
4202            LATIN,
4203            GREEK,
4204            COMMON,
4205            INHERITED,
4206            COMMON,
4207            LATIN,
4208            COMMON,
4209            LATIN,
4210            COMMON,
4211            LATIN,
4212            COMMON,
4213            INHERITED,
4214            COMMON,
4215            GREEK,
4216            COMMON,
4217            LATIN,
4218            COMMON,
4219            LATIN,
4220            COMMON,
4221            LATIN,
4222            COMMON,
4223            LATIN,
4224            COMMON,
4225            BRAILLE,
4226            COMMON,
4227            GLAGOLITIC,
4228            LATIN,
4229            COPTIC,
4230            GEORGIAN,
4231            TIFINAGH,
4232            ETHIOPIC,
4233            CYRILLIC,
4234            COMMON,
4235            HAN,
4236            COMMON,
4237            HAN,
4238            COMMON,
4239            HAN,
4240            COMMON,
4241            HAN,
4242            INHERITED,
4243            HANGUL,
4244            COMMON,
4245            HAN,
4246            COMMON,
4247            HIRAGANA,
4248            INHERITED,
4249            COMMON,
4250            HIRAGANA,
4251            COMMON,
4252            KATAKANA,
4253            COMMON,
4254            KATAKANA,
4255            BOPOMOFO,
4256            HANGUL,
4257            COMMON,
4258            BOPOMOFO,
4259            COMMON,
4260            KATAKANA,
4261            HANGUL,
4262            COMMON,
4263            HANGUL,
4264            COMMON,
4265            KATAKANA,
4266            COMMON,
4267            HAN,
4268            COMMON,
4269            HAN,
4270            YI,
4271            LISU,
4272            VAI,
4273            CYRILLIC,
4274            BAMUM,
4275            COMMON,
4276            LATIN,
4277            COMMON,
4278            LATIN,
4279            SYLOTI_NAGRI,
4280            COMMON,
4281            PHAGS_PA,
4282            SAURASHTRA,
4283            DEVANAGARI,
4284            KAYAH_LI,
4285            REJANG,
4286            HANGUL,
4287            JAVANESE,
4288            CHAM,
4289            MYANMAR,
4290            TAI_VIET,
4291            MEETEI_MAYEK,
4292            ETHIOPIC,
4293            MEETEI_MAYEK,
4294            HANGUL,
4295            UNKNOWN     ,
4296            HAN,
4297            LATIN,
4298            ARMENIAN,
4299            HEBREW,
4300            ARABIC,
4301            COMMON,
4302            ARABIC,
4303            COMMON,
4304            INHERITED,
4305            COMMON,
4306            INHERITED,
4307            COMMON,
4308            ARABIC,
4309            COMMON,
4310            LATIN,
4311            COMMON,
4312            LATIN,
4313            COMMON,
4314            KATAKANA,
4315            COMMON,
4316            KATAKANA,
4317            COMMON,
4318            HANGUL,
4319            COMMON,
4320            LINEAR_B,
4321            COMMON,
4322            GREEK,
4323            COMMON,
4324            INHERITED,
4325            LYCIAN,
4326            CARIAN,
4327            OLD_ITALIC,
4328            GOTHIC,
4329            UGARITIC,
4330            OLD_PERSIAN,
4331            DESERET,
4332            SHAVIAN,
4333            OSMANYA,
4334            CYPRIOT,
4335            IMPERIAL_ARAMAIC,
4336            PHOENICIAN,
4337            LYDIAN,
4338            MEROITIC_HIEROGLYPHS,
4339            MEROITIC_CURSIVE,
4340            KHAROSHTHI,
4341            OLD_SOUTH_ARABIAN,
4342            AVESTAN,
4343            INSCRIPTIONAL_PARTHIAN,
4344            INSCRIPTIONAL_PAHLAVI,
4345            OLD_TURKIC,
4346            ARABIC,
4347            BRAHMI,
4348            KAITHI,
4349            SORA_SOMPENG,
4350            CHAKMA,
4351            SHARADA,
4352            TAKRI,
4353            CUNEIFORM,
4354            EGYPTIAN_HIEROGLYPHS,
4355            BAMUM,
4356            MIAO,
4357            KATAKANA,
4358            HIRAGANA,
4359            COMMON,
4360            INHERITED,
4361            COMMON,
4362            INHERITED,
4363            COMMON,
4364            INHERITED,
4365            COMMON,
4366            INHERITED,
4367            COMMON,
4368            GREEK,
4369            COMMON,
4370            ARABIC,
4371            COMMON,
4372            HIRAGANA,
4373            COMMON,
4374            HAN,
4375            COMMON,
4376            INHERITED,
4377            UNKNOWN
4378        };
4379
4380        private static HashMap<String, Character.UnicodeScript> aliases;
4381        static {
4382            aliases = new HashMap<>(128);
4383            aliases.put("ARAB", ARABIC);
4384            aliases.put("ARMI", IMPERIAL_ARAMAIC);
4385            aliases.put("ARMN", ARMENIAN);
4386            aliases.put("AVST", AVESTAN);
4387            aliases.put("BALI", BALINESE);
4388            aliases.put("BAMU", BAMUM);
4389            aliases.put("BATK", BATAK);
4390            aliases.put("BENG", BENGALI);
4391            aliases.put("BOPO", BOPOMOFO);
4392            aliases.put("BRAI", BRAILLE);
4393            aliases.put("BRAH", BRAHMI);
4394            aliases.put("BUGI", BUGINESE);
4395            aliases.put("BUHD", BUHID);
4396            aliases.put("CAKM", CHAKMA);
4397            aliases.put("CANS", CANADIAN_ABORIGINAL);
4398            aliases.put("CARI", CARIAN);
4399            aliases.put("CHAM", CHAM);
4400            aliases.put("CHER", CHEROKEE);
4401            aliases.put("COPT", COPTIC);
4402            aliases.put("CPRT", CYPRIOT);
4403            aliases.put("CYRL", CYRILLIC);
4404            aliases.put("DEVA", DEVANAGARI);
4405            aliases.put("DSRT", DESERET);
4406            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4407            aliases.put("ETHI", ETHIOPIC);
4408            aliases.put("GEOR", GEORGIAN);
4409            aliases.put("GLAG", GLAGOLITIC);
4410            aliases.put("GOTH", GOTHIC);
4411            aliases.put("GREK", GREEK);
4412            aliases.put("GUJR", GUJARATI);
4413            aliases.put("GURU", GURMUKHI);
4414            aliases.put("HANG", HANGUL);
4415            aliases.put("HANI", HAN);
4416            aliases.put("HANO", HANUNOO);
4417            aliases.put("HEBR", HEBREW);
4418            aliases.put("HIRA", HIRAGANA);
4419            // it appears we don't have the KATAKANA_OR_HIRAGANA
4420            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4421            aliases.put("ITAL", OLD_ITALIC);
4422            aliases.put("JAVA", JAVANESE);
4423            aliases.put("KALI", KAYAH_LI);
4424            aliases.put("KANA", KATAKANA);
4425            aliases.put("KHAR", KHAROSHTHI);
4426            aliases.put("KHMR", KHMER);
4427            aliases.put("KNDA", KANNADA);
4428            aliases.put("KTHI", KAITHI);
4429            aliases.put("LANA", TAI_THAM);
4430            aliases.put("LAOO", LAO);
4431            aliases.put("LATN", LATIN);
4432            aliases.put("LEPC", LEPCHA);
4433            aliases.put("LIMB", LIMBU);
4434            aliases.put("LINB", LINEAR_B);
4435            aliases.put("LISU", LISU);
4436            aliases.put("LYCI", LYCIAN);
4437            aliases.put("LYDI", LYDIAN);
4438            aliases.put("MAND", MANDAIC);
4439            aliases.put("MERC", MEROITIC_CURSIVE);
4440            aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4441            aliases.put("MLYM", MALAYALAM);
4442            aliases.put("MONG", MONGOLIAN);
4443            aliases.put("MTEI", MEETEI_MAYEK);
4444            aliases.put("MYMR", MYANMAR);
4445            aliases.put("NKOO", NKO);
4446            aliases.put("OGAM", OGHAM);
4447            aliases.put("OLCK", OL_CHIKI);
4448            aliases.put("ORKH", OLD_TURKIC);
4449            aliases.put("ORYA", ORIYA);
4450            aliases.put("OSMA", OSMANYA);
4451            aliases.put("PHAG", PHAGS_PA);
4452            aliases.put("PLRD", MIAO);
4453            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4454            aliases.put("PHNX", PHOENICIAN);
4455            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4456            aliases.put("RJNG", REJANG);
4457            aliases.put("RUNR", RUNIC);
4458            aliases.put("SAMR", SAMARITAN);
4459            aliases.put("SARB", OLD_SOUTH_ARABIAN);
4460            aliases.put("SAUR", SAURASHTRA);
4461            aliases.put("SHAW", SHAVIAN);
4462            aliases.put("SHRD", SHARADA);
4463            aliases.put("SINH", SINHALA);
4464            aliases.put("SORA", SORA_SOMPENG);
4465            aliases.put("SUND", SUNDANESE);
4466            aliases.put("SYLO", SYLOTI_NAGRI);
4467            aliases.put("SYRC", SYRIAC);
4468            aliases.put("TAGB", TAGBANWA);
4469            aliases.put("TALE", TAI_LE);
4470            aliases.put("TAKR", TAKRI);
4471            aliases.put("TALU", NEW_TAI_LUE);
4472            aliases.put("TAML", TAMIL);
4473            aliases.put("TAVT", TAI_VIET);
4474            aliases.put("TELU", TELUGU);
4475            aliases.put("TFNG", TIFINAGH);
4476            aliases.put("TGLG", TAGALOG);
4477            aliases.put("THAA", THAANA);
4478            aliases.put("THAI", THAI);
4479            aliases.put("TIBT", TIBETAN);
4480            aliases.put("UGAR", UGARITIC);
4481            aliases.put("VAII", VAI);
4482            aliases.put("XPEO", OLD_PERSIAN);
4483            aliases.put("XSUX", CUNEIFORM);
4484            aliases.put("YIII", YI);
4485            aliases.put("ZINH", INHERITED);
4486            aliases.put("ZYYY", COMMON);
4487            aliases.put("ZZZZ", UNKNOWN);
4488        }
4489
4490        /**
4491         * Returns the enum constant representing the Unicode script of which
4492         * the given character (Unicode code point) is assigned to.
4493         *
4494         * @param   codePoint the character (Unicode code point) in question.
4495         * @return  The {@code UnicodeScript} constant representing the
4496         *          Unicode script of which this character is assigned to.
4497         *
4498         * @exception IllegalArgumentException if the specified
4499         * {@code codePoint} is an invalid Unicode code point.
4500         * @see Character#isValidCodePoint(int)
4501         *
4502         */
4503        public static UnicodeScript of(int codePoint) {
4504            if (!isValidCodePoint(codePoint))
4505                throw new IllegalArgumentException();
4506            int type = getType(codePoint);
4507            // leave SURROGATE and PRIVATE_USE for table lookup
4508            if (type == UNASSIGNED)
4509                return UNKNOWN;
4510            int index = Arrays.binarySearch(scriptStarts, codePoint);
4511            if (index < 0)
4512                index = -index - 2;
4513            return scripts[index];
4514        }
4515
4516        /**
4517         * Returns the UnicodeScript constant with the given Unicode script
4518         * name or the script name alias. Script names and their aliases are
4519         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4520         * and PropertyValueAliases&lt;version&gt;.txt define script names
4521         * and the script name aliases for a particular version of the
4522         * standard. The {@link Character} class specifies the version of
4523         * the standard that it supports.
4524         * <p>
4525         * Character case is ignored for all of the valid script names.
4526         * The en_US locale's case mapping rules are used to provide
4527         * case-insensitive string comparisons for script name validation.
4528         * <p>
4529         *
4530         * @param scriptName A {@code UnicodeScript} name.
4531         * @return The {@code UnicodeScript} constant identified
4532         *         by {@code scriptName}
4533         * @throws IllegalArgumentException if {@code scriptName} is an
4534         *         invalid name
4535         * @throws NullPointerException if {@code scriptName} is null
4536         */
4537        public static final UnicodeScript forName(String scriptName) {
4538            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4539                                 //.replace(' ', '_'));
4540            UnicodeScript sc = aliases.get(scriptName);
4541            if (sc != null)
4542                return sc;
4543            return valueOf(scriptName);
4544        }
4545    }
4546
4547    /**
4548     * The value of the {@code Character}.
4549     *
4550     * @serial
4551     */
4552    private final char value;
4553
4554    /** use serialVersionUID from JDK 1.0.2 for interoperability */
4555    private static final long serialVersionUID = 3786198910865385080L;
4556
4557    /**
4558     * Constructs a newly allocated {@code Character} object that
4559     * represents the specified {@code char} value.
4560     *
4561     * @param  value   the value to be represented by the
4562     *                  {@code Character} object.
4563     */
4564    public Character(char value) {
4565        this.value = value;
4566    }
4567
4568    private static class CharacterCache {
4569        private CharacterCache(){}
4570
4571        static final Character cache[] = new Character[127 + 1];
4572
4573        static {
4574            for (int i = 0; i < cache.length; i++)
4575                cache[i] = new Character((char)i);
4576        }
4577    }
4578
4579    /**
4580     * Returns a <tt>Character</tt> instance representing the specified
4581     * <tt>char</tt> value.
4582     * If a new <tt>Character</tt> instance is not required, this method
4583     * should generally be used in preference to the constructor
4584     * {@link #Character(char)}, as this method is likely to yield
4585     * significantly better space and time performance by caching
4586     * frequently requested values.
4587     *
4588     * This method will always cache values in the range {@code
4589     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4590     * cache other values outside of this range.
4591     *
4592     * @param  c a char value.
4593     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4594     * @since  1.5
4595     */
4596    public static Character valueOf(char c) {
4597        if (c <= 127) { // must cache
4598            return CharacterCache.cache[(int)c];
4599        }
4600        return new Character(c);
4601    }
4602
4603    /**
4604     * Returns the value of this {@code Character} object.
4605     * @return  the primitive {@code char} value represented by
4606     *          this object.
4607     */
4608    public char charValue() {
4609        return value;
4610    }
4611
4612    /**
4613     * Returns a hash code for this {@code Character}; equal to the result
4614     * of invoking {@code charValue()}.
4615     *
4616     * @return a hash code value for this {@code Character}
4617     */
4618    @Override
4619    public int hashCode() {
4620        return Character.hashCode(value);
4621    }
4622
4623    /**
4624     * Returns a hash code for a {@code char} value; compatible with
4625     * {@code Character.hashCode()}.
4626     *
4627     * @since 1.8
4628     *
4629     * @param value The {@code char} for which to return a hash code.
4630     * @return a hash code value for a {@code char} value.
4631     */
4632    public static int hashCode(char value) {
4633        return (int)value;
4634    }
4635
4636    /**
4637     * Compares this object against the specified object.
4638     * The result is {@code true} if and only if the argument is not
4639     * {@code null} and is a {@code Character} object that
4640     * represents the same {@code char} value as this object.
4641     *
4642     * @param   obj   the object to compare with.
4643     * @return  {@code true} if the objects are the same;
4644     *          {@code false} otherwise.
4645     */
4646    public boolean equals(Object obj) {
4647        if (obj instanceof Character) {
4648            return value == ((Character)obj).charValue();
4649        }
4650        return false;
4651    }
4652
4653    /**
4654     * Returns a {@code String} object representing this
4655     * {@code Character}'s value.  The result is a string of
4656     * length 1 whose sole component is the primitive
4657     * {@code char} value represented by this
4658     * {@code Character} object.
4659     *
4660     * @return  a string representation of this object.
4661     */
4662    public String toString() {
4663        char buf[] = {value};
4664        return String.valueOf(buf);
4665    }
4666
4667    /**
4668     * Returns a {@code String} object representing the
4669     * specified {@code char}.  The result is a string of length
4670     * 1 consisting solely of the specified {@code char}.
4671     *
4672     * @param c the {@code char} to be converted
4673     * @return the string representation of the specified {@code char}
4674     * @since 1.4
4675     */
4676    public static String toString(char c) {
4677        return String.valueOf(c);
4678    }
4679
4680    /**
4681     * Determines whether the specified code point is a valid
4682     * <a href="http://www.unicode.org/glossary/#code_point">
4683     * Unicode code point value</a>.
4684     *
4685     * @param  codePoint the Unicode code point to be tested
4686     * @return {@code true} if the specified code point value is between
4687     *         {@link #MIN_CODE_POINT} and
4688     *         {@link #MAX_CODE_POINT} inclusive;
4689     *         {@code false} otherwise.
4690     * @since  1.5
4691     */
4692    public static boolean isValidCodePoint(int codePoint) {
4693        // Optimized form of:
4694        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4695        int plane = codePoint >>> 16;
4696        return plane < ((MAX_CODE_POINT + 1) >>> 16);
4697    }
4698
4699    /**
4700     * Determines whether the specified character (Unicode code point)
4701     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4702     * Such code points can be represented using a single {@code char}.
4703     *
4704     * @param  codePoint the character (Unicode code point) to be tested
4705     * @return {@code true} if the specified code point is between
4706     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4707     *         {@code false} otherwise.
4708     * @since  1.7
4709     */
4710    public static boolean isBmpCodePoint(int codePoint) {
4711        return codePoint >>> 16 == 0;
4712        // Optimized form of:
4713        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4714        // We consistently use logical shift (>>>) to facilitate
4715        // additional runtime optimizations.
4716    }
4717
4718    /**
4719     * Determines whether the specified character (Unicode code point)
4720     * is in the <a href="#supplementary">supplementary character</a> range.
4721     *
4722     * @param  codePoint the character (Unicode code point) to be tested
4723     * @return {@code true} if the specified code point is between
4724     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4725     *         {@link #MAX_CODE_POINT} inclusive;
4726     *         {@code false} otherwise.
4727     * @since  1.5
4728     */
4729    public static boolean isSupplementaryCodePoint(int codePoint) {
4730        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4731            && codePoint <  MAX_CODE_POINT + 1;
4732    }
4733
4734    /**
4735     * Determines if the given {@code char} value is a
4736     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4737     * Unicode high-surrogate code unit</a>
4738     * (also known as <i>leading-surrogate code unit</i>).
4739     *
4740     * <p>Such values do not represent characters by themselves,
4741     * but are used in the representation of
4742     * <a href="#supplementary">supplementary characters</a>
4743     * in the UTF-16 encoding.
4744     *
4745     * @param  ch the {@code char} value to be tested.
4746     * @return {@code true} if the {@code char} value is between
4747     *         {@link #MIN_HIGH_SURROGATE} and
4748     *         {@link #MAX_HIGH_SURROGATE} inclusive;
4749     *         {@code false} otherwise.
4750     * @see    Character#isLowSurrogate(char)
4751     * @see    Character.UnicodeBlock#of(int)
4752     * @since  1.5
4753     */
4754    public static boolean isHighSurrogate(char ch) {
4755        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4756        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4757    }
4758
4759    /**
4760     * Determines if the given {@code char} value is a
4761     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4762     * Unicode low-surrogate code unit</a>
4763     * (also known as <i>trailing-surrogate code unit</i>).
4764     *
4765     * <p>Such values do not represent characters by themselves,
4766     * but are used in the representation of
4767     * <a href="#supplementary">supplementary characters</a>
4768     * in the UTF-16 encoding.
4769     *
4770     * @param  ch the {@code char} value to be tested.
4771     * @return {@code true} if the {@code char} value is between
4772     *         {@link #MIN_LOW_SURROGATE} and
4773     *         {@link #MAX_LOW_SURROGATE} inclusive;
4774     *         {@code false} otherwise.
4775     * @see    Character#isHighSurrogate(char)
4776     * @since  1.5
4777     */
4778    public static boolean isLowSurrogate(char ch) {
4779        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4780    }
4781
4782    /**
4783     * Determines if the given {@code char} value is a Unicode
4784     * <i>surrogate code unit</i>.
4785     *
4786     * <p>Such values do not represent characters by themselves,
4787     * but are used in the representation of
4788     * <a href="#supplementary">supplementary characters</a>
4789     * in the UTF-16 encoding.
4790     *
4791     * <p>A char value is a surrogate code unit if and only if it is either
4792     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4793     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4794     *
4795     * @param  ch the {@code char} value to be tested.
4796     * @return {@code true} if the {@code char} value is between
4797     *         {@link #MIN_SURROGATE} and
4798     *         {@link #MAX_SURROGATE} inclusive;
4799     *         {@code false} otherwise.
4800     * @since  1.7
4801     */
4802    public static boolean isSurrogate(char ch) {
4803        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4804    }
4805
4806    /**
4807     * Determines whether the specified pair of {@code char}
4808     * values is a valid
4809     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4810     * Unicode surrogate pair</a>.
4811
4812     * <p>This method is equivalent to the expression:
4813     * <blockquote><pre>{@code
4814     * isHighSurrogate(high) && isLowSurrogate(low)
4815     * }</pre></blockquote>
4816     *
4817     * @param  high the high-surrogate code value to be tested
4818     * @param  low the low-surrogate code value to be tested
4819     * @return {@code true} if the specified high and
4820     * low-surrogate code values represent a valid surrogate pair;
4821     * {@code false} otherwise.
4822     * @since  1.5
4823     */
4824    public static boolean isSurrogatePair(char high, char low) {
4825        return isHighSurrogate(high) && isLowSurrogate(low);
4826    }
4827
4828    /**
4829     * Determines the number of {@code char} values needed to
4830     * represent the specified character (Unicode code point). If the
4831     * specified character is equal to or greater than 0x10000, then
4832     * the method returns 2. Otherwise, the method returns 1.
4833     *
4834     * <p>This method doesn't validate the specified character to be a
4835     * valid Unicode code point. The caller must validate the
4836     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4837     * if necessary.
4838     *
4839     * @param   codePoint the character (Unicode code point) to be tested.
4840     * @return  2 if the character is a valid supplementary character; 1 otherwise.
4841     * @see     Character#isSupplementaryCodePoint(int)
4842     * @since   1.5
4843     */
4844    public static int charCount(int codePoint) {
4845        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4846    }
4847
4848    /**
4849     * Converts the specified surrogate pair to its supplementary code
4850     * point value. This method does not validate the specified
4851     * surrogate pair. The caller must validate it using {@link
4852     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4853     *
4854     * @param  high the high-surrogate code unit
4855     * @param  low the low-surrogate code unit
4856     * @return the supplementary code point composed from the
4857     *         specified surrogate pair.
4858     * @since  1.5
4859     */
4860    public static int toCodePoint(char high, char low) {
4861        // Optimized form of:
4862        // return ((high - MIN_HIGH_SURROGATE) << 10)
4863        //         + (low - MIN_LOW_SURROGATE)
4864        //         + MIN_SUPPLEMENTARY_CODE_POINT;
4865        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4866                                       - (MIN_HIGH_SURROGATE << 10)
4867                                       - MIN_LOW_SURROGATE);
4868    }
4869
4870    /**
4871     * Returns the code point at the given index of the
4872     * {@code CharSequence}. If the {@code char} value at
4873     * the given index in the {@code CharSequence} is in the
4874     * high-surrogate range, the following index is less than the
4875     * length of the {@code CharSequence}, and the
4876     * {@code char} value at the following index is in the
4877     * low-surrogate range, then the supplementary code point
4878     * corresponding to this surrogate pair is returned. Otherwise,
4879     * the {@code char} value at the given index is returned.
4880     *
4881     * @param seq a sequence of {@code char} values (Unicode code
4882     * units)
4883     * @param index the index to the {@code char} values (Unicode
4884     * code units) in {@code seq} to be converted
4885     * @return the Unicode code point at the given index
4886     * @exception NullPointerException if {@code seq} is null.
4887     * @exception IndexOutOfBoundsException if the value
4888     * {@code index} is negative or not less than
4889     * {@link CharSequence#length() seq.length()}.
4890     * @since  1.5
4891     */
4892    public static int codePointAt(CharSequence seq, int index) {
4893        char c1 = seq.charAt(index);
4894        if (isHighSurrogate(c1) && ++index < seq.length()) {
4895            char c2 = seq.charAt(index);
4896            if (isLowSurrogate(c2)) {
4897                return toCodePoint(c1, c2);
4898            }
4899        }
4900        return c1;
4901    }
4902
4903    /**
4904     * Returns the code point at the given index of the
4905     * {@code char} array. If the {@code char} value at
4906     * the given index in the {@code char} array is in the
4907     * high-surrogate range, the following index is less than the
4908     * length of the {@code char} array, and the
4909     * {@code char} value at the following index is in the
4910     * low-surrogate range, then the supplementary code point
4911     * corresponding to this surrogate pair is returned. Otherwise,
4912     * the {@code char} value at the given index is returned.
4913     *
4914     * @param a the {@code char} array
4915     * @param index the index to the {@code char} values (Unicode
4916     * code units) in the {@code char} array to be converted
4917     * @return the Unicode code point at the given index
4918     * @exception NullPointerException if {@code a} is null.
4919     * @exception IndexOutOfBoundsException if the value
4920     * {@code index} is negative or not less than
4921     * the length of the {@code char} array.
4922     * @since  1.5
4923     */
4924    public static int codePointAt(char[] a, int index) {
4925        return codePointAtImpl(a, index, a.length);
4926    }
4927
4928    /**
4929     * Returns the code point at the given index of the
4930     * {@code char} array, where only array elements with
4931     * {@code index} less than {@code limit} can be used. If
4932     * the {@code char} value at the given index in the
4933     * {@code char} array is in the high-surrogate range, the
4934     * following index is less than the {@code limit}, and the
4935     * {@code char} value at the following index is in the
4936     * low-surrogate range, then the supplementary code point
4937     * corresponding to this surrogate pair is returned. Otherwise,
4938     * the {@code char} value at the given index is returned.
4939     *
4940     * @param a the {@code char} array
4941     * @param index the index to the {@code char} values (Unicode
4942     * code units) in the {@code char} array to be converted
4943     * @param limit the index after the last array element that
4944     * can be used in the {@code char} array
4945     * @return the Unicode code point at the given index
4946     * @exception NullPointerException if {@code a} is null.
4947     * @exception IndexOutOfBoundsException if the {@code index}
4948     * argument is negative or not less than the {@code limit}
4949     * argument, or if the {@code limit} argument is negative or
4950     * greater than the length of the {@code char} array.
4951     * @since  1.5
4952     */
4953    public static int codePointAt(char[] a, int index, int limit) {
4954        if (index >= limit || limit < 0 || limit > a.length) {
4955            throw new IndexOutOfBoundsException();
4956        }
4957        return codePointAtImpl(a, index, limit);
4958    }
4959
4960    // throws ArrayIndexOutOfBoundsException if index out of bounds
4961    static int codePointAtImpl(char[] a, int index, int limit) {
4962        char c1 = a[index];
4963        if (isHighSurrogate(c1) && ++index < limit) {
4964            char c2 = a[index];
4965            if (isLowSurrogate(c2)) {
4966                return toCodePoint(c1, c2);
4967            }
4968        }
4969        return c1;
4970    }
4971
4972    /**
4973     * Returns the code point preceding the given index of the
4974     * {@code CharSequence}. If the {@code char} value at
4975     * {@code (index - 1)} in the {@code CharSequence} is in
4976     * the low-surrogate range, {@code (index - 2)} is not
4977     * negative, and the {@code char} value at {@code (index - 2)}
4978     * in the {@code CharSequence} is in the
4979     * high-surrogate range, then the supplementary code point
4980     * corresponding to this surrogate pair is returned. Otherwise,
4981     * the {@code char} value at {@code (index - 1)} is
4982     * returned.
4983     *
4984     * @param seq the {@code CharSequence} instance
4985     * @param index the index following the code point that should be returned
4986     * @return the Unicode code point value before the given index.
4987     * @exception NullPointerException if {@code seq} is null.
4988     * @exception IndexOutOfBoundsException if the {@code index}
4989     * argument is less than 1 or greater than {@link
4990     * CharSequence#length() seq.length()}.
4991     * @since  1.5
4992     */
4993    public static int codePointBefore(CharSequence seq, int index) {
4994        char c2 = seq.charAt(--index);
4995        if (isLowSurrogate(c2) && index > 0) {
4996            char c1 = seq.charAt(--index);
4997            if (isHighSurrogate(c1)) {
4998                return toCodePoint(c1, c2);
4999            }
5000        }
5001        return c2;
5002    }
5003
5004    /**
5005     * Returns the code point preceding the given index of the
5006     * {@code char} array. If the {@code char} value at
5007     * {@code (index - 1)} in the {@code char} array is in
5008     * the low-surrogate range, {@code (index - 2)} is not
5009     * negative, and the {@code char} value at {@code (index - 2)}
5010     * in the {@code char} array is in the
5011     * high-surrogate range, then the supplementary code point
5012     * corresponding to this surrogate pair is returned. Otherwise,
5013     * the {@code char} value at {@code (index - 1)} is
5014     * returned.
5015     *
5016     * @param a the {@code char} array
5017     * @param index the index following the code point that should be returned
5018     * @return the Unicode code point value before the given index.
5019     * @exception NullPointerException if {@code a} is null.
5020     * @exception IndexOutOfBoundsException if the {@code index}
5021     * argument is less than 1 or greater than the length of the
5022     * {@code char} array
5023     * @since  1.5
5024     */
5025    public static int codePointBefore(char[] a, int index) {
5026        return codePointBeforeImpl(a, index, 0);
5027    }
5028
5029    /**
5030     * Returns the code point preceding the given index of the
5031     * {@code char} array, where only array elements with
5032     * {@code index} greater than or equal to {@code start}
5033     * can be used. If the {@code char} value at {@code (index - 1)}
5034     * in the {@code char} array is in the
5035     * low-surrogate range, {@code (index - 2)} is not less than
5036     * {@code start}, and the {@code char} value at
5037     * {@code (index - 2)} in the {@code char} array is in
5038     * the high-surrogate range, then the supplementary code point
5039     * corresponding to this surrogate pair is returned. Otherwise,
5040     * the {@code char} value at {@code (index - 1)} is
5041     * returned.
5042     *
5043     * @param a the {@code char} array
5044     * @param index the index following the code point that should be returned
5045     * @param start the index of the first array element in the
5046     * {@code char} array
5047     * @return the Unicode code point value before the given index.
5048     * @exception NullPointerException if {@code a} is null.
5049     * @exception IndexOutOfBoundsException if the {@code index}
5050     * argument is not greater than the {@code start} argument or
5051     * is greater than the length of the {@code char} array, or
5052     * if the {@code start} argument is negative or not less than
5053     * the length of the {@code char} array.
5054     * @since  1.5
5055     */
5056    public static int codePointBefore(char[] a, int index, int start) {
5057        if (index <= start || start < 0 || start >= a.length) {
5058            throw new IndexOutOfBoundsException();
5059        }
5060        return codePointBeforeImpl(a, index, start);
5061    }
5062
5063    // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5064    static int codePointBeforeImpl(char[] a, int index, int start) {
5065        char c2 = a[--index];
5066        if (isLowSurrogate(c2) && index > start) {
5067            char c1 = a[--index];
5068            if (isHighSurrogate(c1)) {
5069                return toCodePoint(c1, c2);
5070            }
5071        }
5072        return c2;
5073    }
5074
5075    /**
5076     * Returns the leading surrogate (a
5077     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5078     * high surrogate code unit</a>) of the
5079     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5080     * surrogate pair</a>
5081     * representing the specified supplementary character (Unicode
5082     * code point) in the UTF-16 encoding.  If the specified character
5083     * is not a
5084     * <a href="Character.html#supplementary">supplementary character</a>,
5085     * an unspecified {@code char} is returned.
5086     *
5087     * <p>If
5088     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5089     * is {@code true}, then
5090     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5091     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5092     * are also always {@code true}.
5093     *
5094     * @param   codePoint a supplementary character (Unicode code point)
5095     * @return  the leading surrogate code unit used to represent the
5096     *          character in the UTF-16 encoding
5097     * @since   1.7
5098     */
5099    public static char highSurrogate(int codePoint) {
5100        return (char) ((codePoint >>> 10)
5101            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5102    }
5103
5104    /**
5105     * Returns the trailing surrogate (a
5106     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5107     * low surrogate code unit</a>) of the
5108     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5109     * surrogate pair</a>
5110     * representing the specified supplementary character (Unicode
5111     * code point) in the UTF-16 encoding.  If the specified character
5112     * is not a
5113     * <a href="Character.html#supplementary">supplementary character</a>,
5114     * an unspecified {@code char} is returned.
5115     *
5116     * <p>If
5117     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5118     * is {@code true}, then
5119     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5120     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5121     * are also always {@code true}.
5122     *
5123     * @param   codePoint a supplementary character (Unicode code point)
5124     * @return  the trailing surrogate code unit used to represent the
5125     *          character in the UTF-16 encoding
5126     * @since   1.7
5127     */
5128    public static char lowSurrogate(int codePoint) {
5129        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5130    }
5131
5132    /**
5133     * Converts the specified character (Unicode code point) to its
5134     * UTF-16 representation. If the specified code point is a BMP
5135     * (Basic Multilingual Plane or Plane 0) value, the same value is
5136     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5137     * specified code point is a supplementary character, its
5138     * surrogate values are stored in {@code dst[dstIndex]}
5139     * (high-surrogate) and {@code dst[dstIndex+1]}
5140     * (low-surrogate), and 2 is returned.
5141     *
5142     * @param  codePoint the character (Unicode code point) to be converted.
5143     * @param  dst an array of {@code char} in which the
5144     * {@code codePoint}'s UTF-16 value is stored.
5145     * @param dstIndex the start index into the {@code dst}
5146     * array where the converted value is stored.
5147     * @return 1 if the code point is a BMP code point, 2 if the
5148     * code point is a supplementary code point.
5149     * @exception IllegalArgumentException if the specified
5150     * {@code codePoint} is not a valid Unicode code point.
5151     * @exception NullPointerException if the specified {@code dst} is null.
5152     * @exception IndexOutOfBoundsException if {@code dstIndex}
5153     * is negative or not less than {@code dst.length}, or if
5154     * {@code dst} at {@code dstIndex} doesn't have enough
5155     * array element(s) to store the resulting {@code char}
5156     * value(s). (If {@code dstIndex} is equal to
5157     * {@code dst.length-1} and the specified
5158     * {@code codePoint} is a supplementary character, the
5159     * high-surrogate value is not stored in
5160     * {@code dst[dstIndex]}.)
5161     * @since  1.5
5162     */
5163    public static int toChars(int codePoint, char[] dst, int dstIndex) {
5164        if (isBmpCodePoint(codePoint)) {
5165            dst[dstIndex] = (char) codePoint;
5166            return 1;
5167        } else if (isValidCodePoint(codePoint)) {
5168            toSurrogates(codePoint, dst, dstIndex);
5169            return 2;
5170        } else {
5171            throw new IllegalArgumentException();
5172        }
5173    }
5174
5175    /**
5176     * Converts the specified character (Unicode code point) to its
5177     * UTF-16 representation stored in a {@code char} array. If
5178     * the specified code point is a BMP (Basic Multilingual Plane or
5179     * Plane 0) value, the resulting {@code char} array has
5180     * the same value as {@code codePoint}. If the specified code
5181     * point is a supplementary code point, the resulting
5182     * {@code char} array has the corresponding surrogate pair.
5183     *
5184     * @param  codePoint a Unicode code point
5185     * @return a {@code char} array having
5186     *         {@code codePoint}'s UTF-16 representation.
5187     * @exception IllegalArgumentException if the specified
5188     * {@code codePoint} is not a valid Unicode code point.
5189     * @since  1.5
5190     */
5191    public static char[] toChars(int codePoint) {
5192        if (isBmpCodePoint(codePoint)) {
5193            return new char[] { (char) codePoint };
5194        } else if (isValidCodePoint(codePoint)) {
5195            char[] result = new char[2];
5196            toSurrogates(codePoint, result, 0);
5197            return result;
5198        } else {
5199            throw new IllegalArgumentException();
5200        }
5201    }
5202
5203    static void toSurrogates(int codePoint, char[] dst, int index) {
5204        // We write elements "backwards" to guarantee all-or-nothing
5205        dst[index+1] = lowSurrogate(codePoint);
5206        dst[index] = highSurrogate(codePoint);
5207    }
5208
5209    /**
5210     * Returns the number of Unicode code points in the text range of
5211     * the specified char sequence. The text range begins at the
5212     * specified {@code beginIndex} and extends to the
5213     * {@code char} at index {@code endIndex - 1}. Thus the
5214     * length (in {@code char}s) of the text range is
5215     * {@code endIndex-beginIndex}. Unpaired surrogates within
5216     * the text range count as one code point each.
5217     *
5218     * @param seq the char sequence
5219     * @param beginIndex the index to the first {@code char} of
5220     * the text range.
5221     * @param endIndex the index after the last {@code char} of
5222     * the text range.
5223     * @return the number of Unicode code points in the specified text
5224     * range
5225     * @exception NullPointerException if {@code seq} is null.
5226     * @exception IndexOutOfBoundsException if the
5227     * {@code beginIndex} is negative, or {@code endIndex}
5228     * is larger than the length of the given sequence, or
5229     * {@code beginIndex} is larger than {@code endIndex}.
5230     * @since  1.5
5231     */
5232    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5233        int length = seq.length();
5234        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5235            throw new IndexOutOfBoundsException();
5236        }
5237        int n = endIndex - beginIndex;
5238        for (int i = beginIndex; i < endIndex; ) {
5239            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5240                isLowSurrogate(seq.charAt(i))) {
5241                n--;
5242                i++;
5243            }
5244        }
5245        return n;
5246    }
5247
5248    /**
5249     * Returns the number of Unicode code points in a subarray of the
5250     * {@code char} array argument. The {@code offset}
5251     * argument is the index of the first {@code char} of the
5252     * subarray and the {@code count} argument specifies the
5253     * length of the subarray in {@code char}s. Unpaired
5254     * surrogates within the subarray count as one code point each.
5255     *
5256     * @param a the {@code char} array
5257     * @param offset the index of the first {@code char} in the
5258     * given {@code char} array
5259     * @param count the length of the subarray in {@code char}s
5260     * @return the number of Unicode code points in the specified subarray
5261     * @exception NullPointerException if {@code a} is null.
5262     * @exception IndexOutOfBoundsException if {@code offset} or
5263     * {@code count} is negative, or if {@code offset +
5264     * count} is larger than the length of the given array.
5265     * @since  1.5
5266     */
5267    public static int codePointCount(char[] a, int offset, int count) {
5268        if (count > a.length - offset || offset < 0 || count < 0) {
5269            throw new IndexOutOfBoundsException();
5270        }
5271        return codePointCountImpl(a, offset, count);
5272    }
5273
5274    static int codePointCountImpl(char[] a, int offset, int count) {
5275        int endIndex = offset + count;
5276        int n = count;
5277        for (int i = offset; i < endIndex; ) {
5278            if (isHighSurrogate(a[i++]) && i < endIndex &&
5279                isLowSurrogate(a[i])) {
5280                n--;
5281                i++;
5282            }
5283        }
5284        return n;
5285    }
5286
5287    /**
5288     * Returns the index within the given char sequence that is offset
5289     * from the given {@code index} by {@code codePointOffset}
5290     * code points. Unpaired surrogates within the text range given by
5291     * {@code index} and {@code codePointOffset} count as
5292     * one code point each.
5293     *
5294     * @param seq the char sequence
5295     * @param index the index to be offset
5296     * @param codePointOffset the offset in code points
5297     * @return the index within the char sequence
5298     * @exception NullPointerException if {@code seq} is null.
5299     * @exception IndexOutOfBoundsException if {@code index}
5300     *   is negative or larger then the length of the char sequence,
5301     *   or if {@code codePointOffset} is positive and the
5302     *   subsequence starting with {@code index} has fewer than
5303     *   {@code codePointOffset} code points, or if
5304     *   {@code codePointOffset} is negative and the subsequence
5305     *   before {@code index} has fewer than the absolute value
5306     *   of {@code codePointOffset} code points.
5307     * @since 1.5
5308     */
5309    public static int offsetByCodePoints(CharSequence seq, int index,
5310                                         int codePointOffset) {
5311        int length = seq.length();
5312        if (index < 0 || index > length) {
5313            throw new IndexOutOfBoundsException();
5314        }
5315
5316        int x = index;
5317        if (codePointOffset >= 0) {
5318            int i;
5319            for (i = 0; x < length && i < codePointOffset; i++) {
5320                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5321                    isLowSurrogate(seq.charAt(x))) {
5322                    x++;
5323                }
5324            }
5325            if (i < codePointOffset) {
5326                throw new IndexOutOfBoundsException();
5327            }
5328        } else {
5329            int i;
5330            for (i = codePointOffset; x > 0 && i < 0; i++) {
5331                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5332                    isHighSurrogate(seq.charAt(x-1))) {
5333                    x--;
5334                }
5335            }
5336            if (i < 0) {
5337                throw new IndexOutOfBoundsException();
5338            }
5339        }
5340        return x;
5341    }
5342
5343    /**
5344     * Returns the index within the given {@code char} subarray
5345     * that is offset from the given {@code index} by
5346     * {@code codePointOffset} code points. The
5347     * {@code start} and {@code count} arguments specify a
5348     * subarray of the {@code char} array. Unpaired surrogates
5349     * within the text range given by {@code index} and
5350     * {@code codePointOffset} count as one code point each.
5351     *
5352     * @param a the {@code char} array
5353     * @param start the index of the first {@code char} of the
5354     * subarray
5355     * @param count the length of the subarray in {@code char}s
5356     * @param index the index to be offset
5357     * @param codePointOffset the offset in code points
5358     * @return the index within the subarray
5359     * @exception NullPointerException if {@code a} is null.
5360     * @exception IndexOutOfBoundsException
5361     *   if {@code start} or {@code count} is negative,
5362     *   or if {@code start + count} is larger than the length of
5363     *   the given array,
5364     *   or if {@code index} is less than {@code start} or
5365     *   larger then {@code start + count},
5366     *   or if {@code codePointOffset} is positive and the text range
5367     *   starting with {@code index} and ending with {@code start + count - 1}
5368     *   has fewer than {@code codePointOffset} code
5369     *   points,
5370     *   or if {@code codePointOffset} is negative and the text range
5371     *   starting with {@code start} and ending with {@code index - 1}
5372     *   has fewer than the absolute value of
5373     *   {@code codePointOffset} code points.
5374     * @since 1.5
5375     */
5376    public static int offsetByCodePoints(char[] a, int start, int count,
5377                                         int index, int codePointOffset) {
5378        if (count > a.length-start || start < 0 || count < 0
5379            || index < start || index > start+count) {
5380            throw new IndexOutOfBoundsException();
5381        }
5382        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5383    }
5384
5385    static int offsetByCodePointsImpl(char[]a, int start, int count,
5386                                      int index, int codePointOffset) {
5387        int x = index;
5388        if (codePointOffset >= 0) {
5389            int limit = start + count;
5390            int i;
5391            for (i = 0; x < limit && i < codePointOffset; i++) {
5392                if (isHighSurrogate(a[x++]) && x < limit &&
5393                    isLowSurrogate(a[x])) {
5394                    x++;
5395                }
5396            }
5397            if (i < codePointOffset) {
5398                throw new IndexOutOfBoundsException();
5399            }
5400        } else {
5401            int i;
5402            for (i = codePointOffset; x > start && i < 0; i++) {
5403                if (isLowSurrogate(a[--x]) && x > start &&
5404                    isHighSurrogate(a[x-1])) {
5405                    x--;
5406                }
5407            }
5408            if (i < 0) {
5409                throw new IndexOutOfBoundsException();
5410            }
5411        }
5412        return x;
5413    }
5414
5415    /**
5416     * Determines if the specified character is a lowercase character.
5417     * <p>
5418     * A character is lowercase if its general category type, provided
5419     * by {@code Character.getType(ch)}, is
5420     * {@code LOWERCASE_LETTER}, or it has contributory property
5421     * Other_Lowercase as defined by the Unicode Standard.
5422     * <p>
5423     * The following are examples of lowercase characters:
5424     * <blockquote><pre>
5425     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5426     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5427     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5428     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5429     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5430     * </pre></blockquote>
5431     * <p> Many other Unicode characters are lowercase too.
5432     *
5433     * <p><b>Note:</b> This method cannot handle <a
5434     * href="#supplementary"> supplementary characters</a>. To support
5435     * all Unicode characters, including supplementary characters, use
5436     * the {@link #isLowerCase(int)} method.
5437     *
5438     * @param   ch   the character to be tested.
5439     * @return  {@code true} if the character is lowercase;
5440     *          {@code false} otherwise.
5441     * @see     Character#isLowerCase(char)
5442     * @see     Character#isTitleCase(char)
5443     * @see     Character#toLowerCase(char)
5444     * @see     Character#getType(char)
5445     */
5446    public static boolean isLowerCase(char ch) {
5447        return isLowerCase((int)ch);
5448    }
5449
5450    /**
5451     * Determines if the specified character (Unicode code point) is a
5452     * lowercase character.
5453     * <p>
5454     * A character is lowercase if its general category type, provided
5455     * by {@link Character#getType getType(codePoint)}, is
5456     * {@code LOWERCASE_LETTER}, or it has contributory property
5457     * Other_Lowercase as defined by the Unicode Standard.
5458     * <p>
5459     * The following are examples of lowercase characters:
5460     * <blockquote><pre>
5461     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5462     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5463     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5464     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5465     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5466     * </pre></blockquote>
5467     * <p> Many other Unicode characters are lowercase too.
5468     *
5469     * @param   codePoint the character (Unicode code point) to be tested.
5470     * @return  {@code true} if the character is lowercase;
5471     *          {@code false} otherwise.
5472     * @see     Character#isLowerCase(int)
5473     * @see     Character#isTitleCase(int)
5474     * @see     Character#toLowerCase(int)
5475     * @see     Character#getType(int)
5476     * @since   1.5
5477     */
5478    public static boolean isLowerCase(int codePoint) {
5479        return isLowerCaseImpl(codePoint);
5480    }
5481
5482    @FastNative
5483    static native boolean isLowerCaseImpl(int codePoint);
5484
5485    /**
5486     * Determines if the specified character is an uppercase character.
5487     * <p>
5488     * A character is uppercase if its general category type, provided by
5489     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5490     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5491     * <p>
5492     * The following are examples of uppercase characters:
5493     * <blockquote><pre>
5494     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5495     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5496     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5497     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5498     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5499     * </pre></blockquote>
5500     * <p> Many other Unicode characters are uppercase too.
5501     *
5502     * <p><b>Note:</b> This method cannot handle <a
5503     * href="#supplementary"> supplementary characters</a>. To support
5504     * all Unicode characters, including supplementary characters, use
5505     * the {@link #isUpperCase(int)} method.
5506     *
5507     * @param   ch   the character to be tested.
5508     * @return  {@code true} if the character is uppercase;
5509     *          {@code false} otherwise.
5510     * @see     Character#isLowerCase(char)
5511     * @see     Character#isTitleCase(char)
5512     * @see     Character#toUpperCase(char)
5513     * @see     Character#getType(char)
5514     * @since   1.0
5515     */
5516    public static boolean isUpperCase(char ch) {
5517        return isUpperCase((int)ch);
5518    }
5519
5520    /**
5521     * Determines if the specified character (Unicode code point) is an uppercase character.
5522     * <p>
5523     * A character is uppercase if its general category type, provided by
5524     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5525     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5526     * <p>
5527     * The following are examples of uppercase characters:
5528     * <blockquote><pre>
5529     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5530     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5531     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5532     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5533     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5534     * </pre></blockquote>
5535     * <p> Many other Unicode characters are uppercase too.<p>
5536     *
5537     * @param   codePoint the character (Unicode code point) to be tested.
5538     * @return  {@code true} if the character is uppercase;
5539     *          {@code false} otherwise.
5540     * @see     Character#isLowerCase(int)
5541     * @see     Character#isTitleCase(int)
5542     * @see     Character#toUpperCase(int)
5543     * @see     Character#getType(int)
5544     * @since   1.5
5545     */
5546    public static boolean isUpperCase(int codePoint) {
5547        return isUpperCaseImpl(codePoint);
5548    }
5549
5550    @FastNative
5551    static native boolean isUpperCaseImpl(int codePoint);
5552
5553
5554    /**
5555     * Determines if the specified character is a titlecase character.
5556     * <p>
5557     * A character is a titlecase character if its general
5558     * category type, provided by {@code Character.getType(ch)},
5559     * is {@code TITLECASE_LETTER}.
5560     * <p>
5561     * Some characters look like pairs of Latin letters. For example, there
5562     * is an uppercase letter that looks like "LJ" and has a corresponding
5563     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5564     * is the appropriate form to use when rendering a word in lowercase
5565     * with initial capitals, as for a book title.
5566     * <p>
5567     * These are some of the Unicode characters for which this method returns
5568     * {@code true}:
5569     * <ul>
5570     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5571     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5572     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5573     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5574     * </ul>
5575     * <p> Many other Unicode characters are titlecase too.
5576     *
5577     * <p><b>Note:</b> This method cannot handle <a
5578     * href="#supplementary"> supplementary characters</a>. To support
5579     * all Unicode characters, including supplementary characters, use
5580     * the {@link #isTitleCase(int)} method.
5581     *
5582     * @param   ch   the character to be tested.
5583     * @return  {@code true} if the character is titlecase;
5584     *          {@code false} otherwise.
5585     * @see     Character#isLowerCase(char)
5586     * @see     Character#isUpperCase(char)
5587     * @see     Character#toTitleCase(char)
5588     * @see     Character#getType(char)
5589     * @since   1.0.2
5590     */
5591    public static boolean isTitleCase(char ch) {
5592        return isTitleCase((int)ch);
5593    }
5594
5595    /**
5596     * Determines if the specified character (Unicode code point) is a titlecase character.
5597     * <p>
5598     * A character is a titlecase character if its general
5599     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5600     * is {@code TITLECASE_LETTER}.
5601     * <p>
5602     * Some characters look like pairs of Latin letters. For example, there
5603     * is an uppercase letter that looks like "LJ" and has a corresponding
5604     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5605     * is the appropriate form to use when rendering a word in lowercase
5606     * with initial capitals, as for a book title.
5607     * <p>
5608     * These are some of the Unicode characters for which this method returns
5609     * {@code true}:
5610     * <ul>
5611     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5612     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5613     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5614     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5615     * </ul>
5616     * <p> Many other Unicode characters are titlecase too.<p>
5617     *
5618     * @param   codePoint the character (Unicode code point) to be tested.
5619     * @return  {@code true} if the character is titlecase;
5620     *          {@code false} otherwise.
5621     * @see     Character#isLowerCase(int)
5622     * @see     Character#isUpperCase(int)
5623     * @see     Character#toTitleCase(int)
5624     * @see     Character#getType(int)
5625     * @since   1.5
5626     */
5627    public static boolean isTitleCase(int codePoint) {
5628        return isTitleCaseImpl(codePoint);
5629    }
5630
5631    @FastNative
5632    static native boolean isTitleCaseImpl(int codePoint);
5633
5634    /**
5635     * Determines if the specified character is a digit.
5636     * <p>
5637     * A character is a digit if its general category type, provided
5638     * by {@code Character.getType(ch)}, is
5639     * {@code DECIMAL_DIGIT_NUMBER}.
5640     * <p>
5641     * Some Unicode character ranges that contain digits:
5642     * <ul>
5643     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5644     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5645     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5646     *     Arabic-Indic digits
5647     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5648     *     Extended Arabic-Indic digits
5649     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5650     *     Devanagari digits
5651     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5652     *     Fullwidth digits
5653     * </ul>
5654     *
5655     * Many other character ranges contain digits as well.
5656     *
5657     * <p><b>Note:</b> This method cannot handle <a
5658     * href="#supplementary"> supplementary characters</a>. To support
5659     * all Unicode characters, including supplementary characters, use
5660     * the {@link #isDigit(int)} method.
5661     *
5662     * @param   ch   the character to be tested.
5663     * @return  {@code true} if the character is a digit;
5664     *          {@code false} otherwise.
5665     * @see     Character#digit(char, int)
5666     * @see     Character#forDigit(int, int)
5667     * @see     Character#getType(char)
5668     */
5669    public static boolean isDigit(char ch) {
5670        return isDigit((int)ch);
5671    }
5672
5673    /**
5674     * Determines if the specified character (Unicode code point) is a digit.
5675     * <p>
5676     * A character is a digit if its general category type, provided
5677     * by {@link Character#getType(int) getType(codePoint)}, is
5678     * {@code DECIMAL_DIGIT_NUMBER}.
5679     * <p>
5680     * Some Unicode character ranges that contain digits:
5681     * <ul>
5682     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5683     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5684     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5685     *     Arabic-Indic digits
5686     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5687     *     Extended Arabic-Indic digits
5688     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5689     *     Devanagari digits
5690     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5691     *     Fullwidth digits
5692     * </ul>
5693     *
5694     * Many other character ranges contain digits as well.
5695     *
5696     * @param   codePoint the character (Unicode code point) to be tested.
5697     * @return  {@code true} if the character is a digit;
5698     *          {@code false} otherwise.
5699     * @see     Character#forDigit(int, int)
5700     * @see     Character#getType(int)
5701     * @since   1.5
5702     */
5703    public static boolean isDigit(int codePoint) {
5704        return isDigitImpl(codePoint);
5705    }
5706
5707    @FastNative
5708    static native boolean isDigitImpl(int codePoint);
5709
5710    /**
5711     * Determines if a character is defined in Unicode.
5712     * <p>
5713     * A character is defined if at least one of the following is true:
5714     * <ul>
5715     * <li>It has an entry in the UnicodeData file.
5716     * <li>It has a value in a range defined by the UnicodeData file.
5717     * </ul>
5718     *
5719     * <p><b>Note:</b> This method cannot handle <a
5720     * href="#supplementary"> supplementary characters</a>. To support
5721     * all Unicode characters, including supplementary characters, use
5722     * the {@link #isDefined(int)} method.
5723     *
5724     * @param   ch   the character to be tested
5725     * @return  {@code true} if the character has a defined meaning
5726     *          in Unicode; {@code false} otherwise.
5727     * @see     Character#isDigit(char)
5728     * @see     Character#isLetter(char)
5729     * @see     Character#isLetterOrDigit(char)
5730     * @see     Character#isLowerCase(char)
5731     * @see     Character#isTitleCase(char)
5732     * @see     Character#isUpperCase(char)
5733     * @since   1.0.2
5734     */
5735    public static boolean isDefined(char ch) {
5736        return isDefined((int)ch);
5737    }
5738
5739    /**
5740     * Determines if a character (Unicode code point) is defined in Unicode.
5741     * <p>
5742     * A character is defined if at least one of the following is true:
5743     * <ul>
5744     * <li>It has an entry in the UnicodeData file.
5745     * <li>It has a value in a range defined by the UnicodeData file.
5746     * </ul>
5747     *
5748     * @param   codePoint the character (Unicode code point) to be tested.
5749     * @return  {@code true} if the character has a defined meaning
5750     *          in Unicode; {@code false} otherwise.
5751     * @see     Character#isDigit(int)
5752     * @see     Character#isLetter(int)
5753     * @see     Character#isLetterOrDigit(int)
5754     * @see     Character#isLowerCase(int)
5755     * @see     Character#isTitleCase(int)
5756     * @see     Character#isUpperCase(int)
5757     * @since   1.5
5758     */
5759    public static boolean isDefined(int codePoint) {
5760        return isDefinedImpl(codePoint);
5761    }
5762
5763    @FastNative
5764    static native boolean isDefinedImpl(int codePoint);
5765
5766    /**
5767     * Determines if the specified character is a letter.
5768     * <p>
5769     * A character is considered to be a letter if its general
5770     * category type, provided by {@code Character.getType(ch)},
5771     * is any of the following:
5772     * <ul>
5773     * <li> {@code UPPERCASE_LETTER}
5774     * <li> {@code LOWERCASE_LETTER}
5775     * <li> {@code TITLECASE_LETTER}
5776     * <li> {@code MODIFIER_LETTER}
5777     * <li> {@code OTHER_LETTER}
5778     * </ul>
5779     *
5780     * Not all letters have case. Many characters are
5781     * letters but are neither uppercase nor lowercase nor titlecase.
5782     *
5783     * <p><b>Note:</b> This method cannot handle <a
5784     * href="#supplementary"> supplementary characters</a>. To support
5785     * all Unicode characters, including supplementary characters, use
5786     * the {@link #isLetter(int)} method.
5787     *
5788     * @param   ch   the character to be tested.
5789     * @return  {@code true} if the character is a letter;
5790     *          {@code false} otherwise.
5791     * @see     Character#isDigit(char)
5792     * @see     Character#isJavaIdentifierStart(char)
5793     * @see     Character#isJavaLetter(char)
5794     * @see     Character#isJavaLetterOrDigit(char)
5795     * @see     Character#isLetterOrDigit(char)
5796     * @see     Character#isLowerCase(char)
5797     * @see     Character#isTitleCase(char)
5798     * @see     Character#isUnicodeIdentifierStart(char)
5799     * @see     Character#isUpperCase(char)
5800     */
5801    public static boolean isLetter(char ch) {
5802        return isLetter((int)ch);
5803    }
5804
5805    /**
5806     * Determines if the specified character (Unicode code point) is a letter.
5807     * <p>
5808     * A character is considered to be a letter if its general
5809     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5810     * is any of the following:
5811     * <ul>
5812     * <li> {@code UPPERCASE_LETTER}
5813     * <li> {@code LOWERCASE_LETTER}
5814     * <li> {@code TITLECASE_LETTER}
5815     * <li> {@code MODIFIER_LETTER}
5816     * <li> {@code OTHER_LETTER}
5817     * </ul>
5818     *
5819     * Not all letters have case. Many characters are
5820     * letters but are neither uppercase nor lowercase nor titlecase.
5821     *
5822     * @param   codePoint the character (Unicode code point) to be tested.
5823     * @return  {@code true} if the character is a letter;
5824     *          {@code false} otherwise.
5825     * @see     Character#isDigit(int)
5826     * @see     Character#isJavaIdentifierStart(int)
5827     * @see     Character#isLetterOrDigit(int)
5828     * @see     Character#isLowerCase(int)
5829     * @see     Character#isTitleCase(int)
5830     * @see     Character#isUnicodeIdentifierStart(int)
5831     * @see     Character#isUpperCase(int)
5832     * @since   1.5
5833     */
5834    public static boolean isLetter(int codePoint) {
5835        return isLetterImpl(codePoint);
5836    }
5837
5838    @FastNative
5839    static native boolean isLetterImpl(int codePoint);
5840
5841    /**
5842     * Determines if the specified character is a letter or digit.
5843     * <p>
5844     * A character is considered to be a letter or digit if either
5845     * {@code Character.isLetter(char ch)} or
5846     * {@code Character.isDigit(char ch)} returns
5847     * {@code true} for the character.
5848     *
5849     * <p><b>Note:</b> This method cannot handle <a
5850     * href="#supplementary"> supplementary characters</a>. To support
5851     * all Unicode characters, including supplementary characters, use
5852     * the {@link #isLetterOrDigit(int)} method.
5853     *
5854     * @param   ch   the character to be tested.
5855     * @return  {@code true} if the character is a letter or digit;
5856     *          {@code false} otherwise.
5857     * @see     Character#isDigit(char)
5858     * @see     Character#isJavaIdentifierPart(char)
5859     * @see     Character#isJavaLetter(char)
5860     * @see     Character#isJavaLetterOrDigit(char)
5861     * @see     Character#isLetter(char)
5862     * @see     Character#isUnicodeIdentifierPart(char)
5863     * @since   1.0.2
5864     */
5865    public static boolean isLetterOrDigit(char ch) {
5866        return isLetterOrDigit((int)ch);
5867    }
5868
5869    /**
5870     * Determines if the specified character (Unicode code point) is a letter or digit.
5871     * <p>
5872     * A character is considered to be a letter or digit if either
5873     * {@link #isLetter(int) isLetter(codePoint)} or
5874     * {@link #isDigit(int) isDigit(codePoint)} returns
5875     * {@code true} for the character.
5876     *
5877     * @param   codePoint the character (Unicode code point) to be tested.
5878     * @return  {@code true} if the character is a letter or digit;
5879     *          {@code false} otherwise.
5880     * @see     Character#isDigit(int)
5881     * @see     Character#isJavaIdentifierPart(int)
5882     * @see     Character#isLetter(int)
5883     * @see     Character#isUnicodeIdentifierPart(int)
5884     * @since   1.5
5885     */
5886    public static boolean isLetterOrDigit(int codePoint) {
5887        return isLetterOrDigitImpl(codePoint);
5888    }
5889
5890    @FastNative
5891    static native boolean isLetterOrDigitImpl(int codePoint);
5892
5893    /**
5894     * Determines if the specified character is permissible as the first
5895     * character in a Java identifier.
5896     * <p>
5897     * A character may start a Java identifier if and only if
5898     * one of the following is true:
5899     * <ul>
5900     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5901     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5902     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5903     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5904     * </ul>
5905     *
5906     * @param   ch the character to be tested.
5907     * @return  {@code true} if the character may start a Java
5908     *          identifier; {@code false} otherwise.
5909     * @see     Character#isJavaLetterOrDigit(char)
5910     * @see     Character#isJavaIdentifierStart(char)
5911     * @see     Character#isJavaIdentifierPart(char)
5912     * @see     Character#isLetter(char)
5913     * @see     Character#isLetterOrDigit(char)
5914     * @see     Character#isUnicodeIdentifierStart(char)
5915     * @since   1.02
5916     * @deprecated Replaced by isJavaIdentifierStart(char).
5917     */
5918    @Deprecated
5919    public static boolean isJavaLetter(char ch) {
5920        return isJavaIdentifierStart(ch);
5921    }
5922
5923    /**
5924     * Determines if the specified character may be part of a Java
5925     * identifier as other than the first character.
5926     * <p>
5927     * A character may be part of a Java identifier if and only if any
5928     * of the following are true:
5929     * <ul>
5930     * <li>  it is a letter
5931     * <li>  it is a currency symbol (such as {@code '$'})
5932     * <li>  it is a connecting punctuation character (such as {@code '_'})
5933     * <li>  it is a digit
5934     * <li>  it is a numeric letter (such as a Roman numeral character)
5935     * <li>  it is a combining mark
5936     * <li>  it is a non-spacing mark
5937     * <li> {@code isIdentifierIgnorable} returns
5938     * {@code true} for the character.
5939     * </ul>
5940     *
5941     * @param   ch the character to be tested.
5942     * @return  {@code true} if the character may be part of a
5943     *          Java identifier; {@code false} otherwise.
5944     * @see     Character#isJavaLetter(char)
5945     * @see     Character#isJavaIdentifierStart(char)
5946     * @see     Character#isJavaIdentifierPart(char)
5947     * @see     Character#isLetter(char)
5948     * @see     Character#isLetterOrDigit(char)
5949     * @see     Character#isUnicodeIdentifierPart(char)
5950     * @see     Character#isIdentifierIgnorable(char)
5951     * @since   1.02
5952     * @deprecated Replaced by isJavaIdentifierPart(char).
5953     */
5954    @Deprecated
5955    public static boolean isJavaLetterOrDigit(char ch) {
5956        return isJavaIdentifierPart(ch);
5957    }
5958
5959    /**
5960     * Determines if the specified character (Unicode code point) is an alphabet.
5961     * <p>
5962     * A character is considered to be alphabetic if its general category type,
5963     * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5964     * the following:
5965     * <ul>
5966     * <li> <code>UPPERCASE_LETTER</code>
5967     * <li> <code>LOWERCASE_LETTER</code>
5968     * <li> <code>TITLECASE_LETTER</code>
5969     * <li> <code>MODIFIER_LETTER</code>
5970     * <li> <code>OTHER_LETTER</code>
5971     * <li> <code>LETTER_NUMBER</code>
5972     * </ul>
5973     * or it has contributory property Other_Alphabetic as defined by the
5974     * Unicode Standard.
5975     *
5976     * @param   codePoint the character (Unicode code point) to be tested.
5977     * @return  <code>true</code> if the character is a Unicode alphabet
5978     *          character, <code>false</code> otherwise.
5979     * @since   1.7
5980     */
5981    public static boolean isAlphabetic(int codePoint) {
5982        return isAlphabeticImpl(codePoint);
5983    }
5984
5985    @FastNative
5986    static native boolean isAlphabeticImpl(int codePoint);
5987
5988
5989    /**
5990     * Determines if the specified character (Unicode code point) is a CJKV
5991     * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5992     * the Unicode Standard.
5993     *
5994     * @param   codePoint the character (Unicode code point) to be tested.
5995     * @return  <code>true</code> if the character is a Unicode ideograph
5996     *          character, <code>false</code> otherwise.
5997     * @since   1.7
5998     */
5999    public static boolean isIdeographic(int codePoint) {
6000        return isIdeographicImpl(codePoint);
6001    }
6002    @FastNative
6003    static native boolean isIdeographicImpl(int codePoint);
6004
6005    /**
6006     * Determines if the specified character is
6007     * permissible as the first character in a Java identifier.
6008     * <p>
6009     * A character may start a Java identifier if and only if
6010     * one of the following conditions is true:
6011     * <ul>
6012     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6013     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
6014     * <li> {@code ch} is a currency symbol (such as {@code '$'})
6015     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
6016     * </ul>
6017     *
6018     * <p><b>Note:</b> This method cannot handle <a
6019     * href="#supplementary"> supplementary characters</a>. To support
6020     * all Unicode characters, including supplementary characters, use
6021     * the {@link #isJavaIdentifierStart(int)} method.
6022     *
6023     * @param   ch the character to be tested.
6024     * @return  {@code true} if the character may start a Java identifier;
6025     *          {@code false} otherwise.
6026     * @see     Character#isJavaIdentifierPart(char)
6027     * @see     Character#isLetter(char)
6028     * @see     Character#isUnicodeIdentifierStart(char)
6029     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6030     * @since   1.1
6031     */
6032    public static boolean isJavaIdentifierStart(char ch) {
6033        return isJavaIdentifierStart((int)ch);
6034    }
6035
6036    /**
6037     * Determines if the character (Unicode code point) is
6038     * permissible as the first character in a Java identifier.
6039     * <p>
6040     * A character may start a Java identifier if and only if
6041     * one of the following conditions is true:
6042     * <ul>
6043     * <li> {@link #isLetter(int) isLetter(codePoint)}
6044     *      returns {@code true}
6045     * <li> {@link #getType(int) getType(codePoint)}
6046     *      returns {@code LETTER_NUMBER}
6047     * <li> the referenced character is a currency symbol (such as {@code '$'})
6048     * <li> the referenced character is a connecting punctuation character
6049     *      (such as {@code '_'}).
6050     * </ul>
6051     *
6052     * @param   codePoint the character (Unicode code point) to be tested.
6053     * @return  {@code true} if the character may start a Java identifier;
6054     *          {@code false} otherwise.
6055     * @see     Character#isJavaIdentifierPart(int)
6056     * @see     Character#isLetter(int)
6057     * @see     Character#isUnicodeIdentifierStart(int)
6058     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6059     * @since   1.5
6060     */
6061    public static boolean isJavaIdentifierStart(int codePoint) {
6062        // Use precomputed bitmasks to optimize the ASCII range.
6063        if (codePoint < 64) {
6064            return (codePoint == '$'); // There's only one character in this range.
6065        } else if (codePoint < 128) {
6066            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6067        }
6068        return ((1 << getType(codePoint))
6069                & ((1 << UPPERCASE_LETTER)
6070                   | (1 << LOWERCASE_LETTER)
6071                   | (1  << TITLECASE_LETTER)
6072                   | (1  << MODIFIER_LETTER)
6073                   | (1  << OTHER_LETTER)
6074                   | (1  << CURRENCY_SYMBOL)
6075                   | (1  << CONNECTOR_PUNCTUATION)
6076                   | (1  << LETTER_NUMBER))) != 0;
6077    }
6078
6079    /**
6080     * Determines if the specified character may be part of a Java
6081     * identifier as other than the first character.
6082     * <p>
6083     * A character may be part of a Java identifier if any of the following
6084     * are true:
6085     * <ul>
6086     * <li>  it is a letter
6087     * <li>  it is a currency symbol (such as {@code '$'})
6088     * <li>  it is a connecting punctuation character (such as {@code '_'})
6089     * <li>  it is a digit
6090     * <li>  it is a numeric letter (such as a Roman numeral character)
6091     * <li>  it is a combining mark
6092     * <li>  it is a non-spacing mark
6093     * <li> {@code isIdentifierIgnorable} returns
6094     * {@code true} for the character
6095     * </ul>
6096     *
6097     * <p><b>Note:</b> This method cannot handle <a
6098     * href="#supplementary"> supplementary characters</a>. To support
6099     * all Unicode characters, including supplementary characters, use
6100     * the {@link #isJavaIdentifierPart(int)} method.
6101     *
6102     * @param   ch      the character to be tested.
6103     * @return {@code true} if the character may be part of a
6104     *          Java identifier; {@code false} otherwise.
6105     * @see     Character#isIdentifierIgnorable(char)
6106     * @see     Character#isJavaIdentifierStart(char)
6107     * @see     Character#isLetterOrDigit(char)
6108     * @see     Character#isUnicodeIdentifierPart(char)
6109     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6110     * @since   1.1
6111     */
6112    public static boolean isJavaIdentifierPart(char ch) {
6113        return isJavaIdentifierPart((int)ch);
6114    }
6115
6116    /**
6117     * Determines if the character (Unicode code point) may be part of a Java
6118     * identifier as other than the first character.
6119     * <p>
6120     * A character may be part of a Java identifier if any of the following
6121     * are true:
6122     * <ul>
6123     * <li>  it is a letter
6124     * <li>  it is a currency symbol (such as {@code '$'})
6125     * <li>  it is a connecting punctuation character (such as {@code '_'})
6126     * <li>  it is a digit
6127     * <li>  it is a numeric letter (such as a Roman numeral character)
6128     * <li>  it is a combining mark
6129     * <li>  it is a non-spacing mark
6130     * <li> {@link #isIdentifierIgnorable(int)
6131     * isIdentifierIgnorable(codePoint)} returns {@code true} for
6132     * the character
6133     * </ul>
6134     *
6135     * @param   codePoint the character (Unicode code point) to be tested.
6136     * @return {@code true} if the character may be part of a
6137     *          Java identifier; {@code false} otherwise.
6138     * @see     Character#isIdentifierIgnorable(int)
6139     * @see     Character#isJavaIdentifierStart(int)
6140     * @see     Character#isLetterOrDigit(int)
6141     * @see     Character#isUnicodeIdentifierPart(int)
6142     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6143     * @since   1.5
6144     */
6145    public static boolean isJavaIdentifierPart(int codePoint) {
6146        // Use precomputed bitmasks to optimize the ASCII range.
6147        if (codePoint < 64) {
6148            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
6149        } else if (codePoint < 128) {
6150            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6151        }
6152        return ((1 << getType(codePoint))
6153                & ((1 << UPPERCASE_LETTER)
6154                   | (1 << LOWERCASE_LETTER)
6155                   | (1 << TITLECASE_LETTER)
6156                   | (1 << MODIFIER_LETTER)
6157                   | (1 << OTHER_LETTER)
6158                   | (1 << CURRENCY_SYMBOL)
6159                   | (1 << CONNECTOR_PUNCTUATION)
6160                   | (1 << DECIMAL_DIGIT_NUMBER)
6161                   | (1 << LETTER_NUMBER)
6162                   | (1 << FORMAT)
6163                   | (1 << COMBINING_SPACING_MARK)
6164                   | (1 << NON_SPACING_MARK))) != 0
6165                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
6166                || (codePoint >= 0x7f && codePoint <= 0x9f);
6167    }
6168
6169    /**
6170     * Determines if the specified character is permissible as the
6171     * first character in a Unicode identifier.
6172     * <p>
6173     * A character may start a Unicode identifier if and only if
6174     * one of the following conditions is true:
6175     * <ul>
6176     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6177     * <li> {@link #getType(char) getType(ch)} returns
6178     *      {@code LETTER_NUMBER}.
6179     * </ul>
6180     *
6181     * <p><b>Note:</b> This method cannot handle <a
6182     * href="#supplementary"> supplementary characters</a>. To support
6183     * all Unicode characters, including supplementary characters, use
6184     * the {@link #isUnicodeIdentifierStart(int)} method.
6185     *
6186     * @param   ch      the character to be tested.
6187     * @return  {@code true} if the character may start a Unicode
6188     *          identifier; {@code false} otherwise.
6189     * @see     Character#isJavaIdentifierStart(char)
6190     * @see     Character#isLetter(char)
6191     * @see     Character#isUnicodeIdentifierPart(char)
6192     * @since   1.1
6193     */
6194    public static boolean isUnicodeIdentifierStart(char ch) {
6195        return isUnicodeIdentifierStart((int)ch);
6196    }
6197
6198    /**
6199     * Determines if the specified character (Unicode code point) is permissible as the
6200     * first character in a Unicode identifier.
6201     * <p>
6202     * A character may start a Unicode identifier if and only if
6203     * one of the following conditions is true:
6204     * <ul>
6205     * <li> {@link #isLetter(int) isLetter(codePoint)}
6206     *      returns {@code true}
6207     * <li> {@link #getType(int) getType(codePoint)}
6208     *      returns {@code LETTER_NUMBER}.
6209     * </ul>
6210     * @param   codePoint the character (Unicode code point) to be tested.
6211     * @return  {@code true} if the character may start a Unicode
6212     *          identifier; {@code false} otherwise.
6213     * @see     Character#isJavaIdentifierStart(int)
6214     * @see     Character#isLetter(int)
6215     * @see     Character#isUnicodeIdentifierPart(int)
6216     * @since   1.5
6217     */
6218    public static boolean isUnicodeIdentifierStart(int codePoint) {
6219        return isUnicodeIdentifierStartImpl(codePoint);
6220    }
6221
6222    @FastNative
6223    static native boolean isUnicodeIdentifierStartImpl(int codePoint);
6224
6225    /**
6226     * Determines if the specified character may be part of a Unicode
6227     * identifier as other than the first character.
6228     * <p>
6229     * A character may be part of a Unicode identifier if and only if
6230     * one of the following statements is true:
6231     * <ul>
6232     * <li>  it is a letter
6233     * <li>  it is a connecting punctuation character (such as {@code '_'})
6234     * <li>  it is a digit
6235     * <li>  it is a numeric letter (such as a Roman numeral character)
6236     * <li>  it is a combining mark
6237     * <li>  it is a non-spacing mark
6238     * <li> {@code isIdentifierIgnorable} returns
6239     * {@code true} for this character.
6240     * </ul>
6241     *
6242     * <p><b>Note:</b> This method cannot handle <a
6243     * href="#supplementary"> supplementary characters</a>. To support
6244     * all Unicode characters, including supplementary characters, use
6245     * the {@link #isUnicodeIdentifierPart(int)} method.
6246     *
6247     * @param   ch      the character to be tested.
6248     * @return  {@code true} if the character may be part of a
6249     *          Unicode identifier; {@code false} otherwise.
6250     * @see     Character#isIdentifierIgnorable(char)
6251     * @see     Character#isJavaIdentifierPart(char)
6252     * @see     Character#isLetterOrDigit(char)
6253     * @see     Character#isUnicodeIdentifierStart(char)
6254     * @since   1.1
6255     */
6256    public static boolean isUnicodeIdentifierPart(char ch) {
6257        return isUnicodeIdentifierPart((int)ch);
6258    }
6259
6260    /**
6261     * Determines if the specified character (Unicode code point) may be part of a Unicode
6262     * identifier as other than the first character.
6263     * <p>
6264     * A character may be part of a Unicode identifier if and only if
6265     * one of the following statements is true:
6266     * <ul>
6267     * <li>  it is a letter
6268     * <li>  it is a connecting punctuation character (such as {@code '_'})
6269     * <li>  it is a digit
6270     * <li>  it is a numeric letter (such as a Roman numeral character)
6271     * <li>  it is a combining mark
6272     * <li>  it is a non-spacing mark
6273     * <li> {@code isIdentifierIgnorable} returns
6274     * {@code true} for this character.
6275     * </ul>
6276     * @param   codePoint the character (Unicode code point) to be tested.
6277     * @return  {@code true} if the character may be part of a
6278     *          Unicode identifier; {@code false} otherwise.
6279     * @see     Character#isIdentifierIgnorable(int)
6280     * @see     Character#isJavaIdentifierPart(int)
6281     * @see     Character#isLetterOrDigit(int)
6282     * @see     Character#isUnicodeIdentifierStart(int)
6283     * @since   1.5
6284     */
6285    public static boolean isUnicodeIdentifierPart(int codePoint) {
6286        return isUnicodeIdentifierPartImpl(codePoint);
6287    }
6288
6289    @FastNative
6290    static native boolean isUnicodeIdentifierPartImpl(int codePoint);
6291
6292    /**
6293     * Determines if the specified character should be regarded as
6294     * an ignorable character in a Java identifier or a Unicode identifier.
6295     * <p>
6296     * The following Unicode characters are ignorable in a Java identifier
6297     * or a Unicode identifier:
6298     * <ul>
6299     * <li>ISO control characters that are not whitespace
6300     * <ul>
6301     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6302     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6303     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6304     * </ul>
6305     *
6306     * <li>all characters that have the {@code FORMAT} general
6307     * category value
6308     * </ul>
6309     *
6310     * <p><b>Note:</b> This method cannot handle <a
6311     * href="#supplementary"> supplementary characters</a>. To support
6312     * all Unicode characters, including supplementary characters, use
6313     * the {@link #isIdentifierIgnorable(int)} method.
6314     *
6315     * @param   ch      the character to be tested.
6316     * @return  {@code true} if the character is an ignorable control
6317     *          character that may be part of a Java or Unicode identifier;
6318     *           {@code false} otherwise.
6319     * @see     Character#isJavaIdentifierPart(char)
6320     * @see     Character#isUnicodeIdentifierPart(char)
6321     * @since   1.1
6322     */
6323    public static boolean isIdentifierIgnorable(char ch) {
6324        return isIdentifierIgnorable((int)ch);
6325    }
6326
6327    /**
6328     * Determines if the specified character (Unicode code point) should be regarded as
6329     * an ignorable character in a Java identifier or a Unicode identifier.
6330     * <p>
6331     * The following Unicode characters are ignorable in a Java identifier
6332     * or a Unicode identifier:
6333     * <ul>
6334     * <li>ISO control characters that are not whitespace
6335     * <ul>
6336     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6337     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6338     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6339     * </ul>
6340     *
6341     * <li>all characters that have the {@code FORMAT} general
6342     * category value
6343     * </ul>
6344     *
6345     * @param   codePoint the character (Unicode code point) to be tested.
6346     * @return  {@code true} if the character is an ignorable control
6347     *          character that may be part of a Java or Unicode identifier;
6348     *          {@code false} otherwise.
6349     * @see     Character#isJavaIdentifierPart(int)
6350     * @see     Character#isUnicodeIdentifierPart(int)
6351     * @since   1.5
6352     */
6353    public static boolean isIdentifierIgnorable(int codePoint) {
6354        return isIdentifierIgnorableImpl(codePoint);
6355    }
6356
6357    @FastNative
6358    static native boolean isIdentifierIgnorableImpl(int codePoint);
6359
6360    /**
6361     * Converts the character argument to lowercase using case
6362     * mapping information from the UnicodeData file.
6363     * <p>
6364     * Note that
6365     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6366     * does not always return {@code true} for some ranges of
6367     * characters, particularly those that are symbols or ideographs.
6368     *
6369     * <p>In general, {@link String#toLowerCase()} should be used to map
6370     * characters to lowercase. {@code String} case mapping methods
6371     * have several benefits over {@code Character} case mapping methods.
6372     * {@code String} case mapping methods can perform locale-sensitive
6373     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6374     * the {@code Character} case mapping methods cannot.
6375     *
6376     * <p><b>Note:</b> This method cannot handle <a
6377     * href="#supplementary"> supplementary characters</a>. To support
6378     * all Unicode characters, including supplementary characters, use
6379     * the {@link #toLowerCase(int)} method.
6380     *
6381     * @param   ch   the character to be converted.
6382     * @return  the lowercase equivalent of the character, if any;
6383     *          otherwise, the character itself.
6384     * @see     Character#isLowerCase(char)
6385     * @see     String#toLowerCase()
6386     */
6387    public static char toLowerCase(char ch) {
6388        return (char)toLowerCase((int)ch);
6389    }
6390
6391    /**
6392     * Converts the character (Unicode code point) argument to
6393     * lowercase using case mapping information from the UnicodeData
6394     * file.
6395     *
6396     * <p> Note that
6397     * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6398     * does not always return {@code true} for some ranges of
6399     * characters, particularly those that are symbols or ideographs.
6400     *
6401     * <p>In general, {@link String#toLowerCase()} should be used to map
6402     * characters to lowercase. {@code String} case mapping methods
6403     * have several benefits over {@code Character} case mapping methods.
6404     * {@code String} case mapping methods can perform locale-sensitive
6405     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6406     * the {@code Character} case mapping methods cannot.
6407     *
6408     * @param   codePoint   the character (Unicode code point) to be converted.
6409     * @return  the lowercase equivalent of the character (Unicode code
6410     *          point), if any; otherwise, the character itself.
6411     * @see     Character#isLowerCase(int)
6412     * @see     String#toLowerCase()
6413     *
6414     * @since   1.5
6415     */
6416    public static int toLowerCase(int codePoint) {
6417        if (codePoint >= 'A' && codePoint <= 'Z') {
6418            return codePoint + ('a' - 'A');
6419        }
6420
6421        // All ASCII codepoints except the ones above remain unchanged.
6422        if (codePoint < 0x80) {
6423            return codePoint;
6424        }
6425
6426        return toLowerCaseImpl(codePoint);
6427    }
6428
6429    @FastNative
6430    static native int toLowerCaseImpl(int codePoint);
6431
6432    /**
6433     * Converts the character argument to uppercase using case mapping
6434     * information from the UnicodeData file.
6435     * <p>
6436     * Note that
6437     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6438     * does not always return {@code true} for some ranges of
6439     * characters, particularly those that are symbols or ideographs.
6440     *
6441     * <p>In general, {@link String#toUpperCase()} should be used to map
6442     * characters to uppercase. {@code String} case mapping methods
6443     * have several benefits over {@code Character} case mapping methods.
6444     * {@code String} case mapping methods can perform locale-sensitive
6445     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6446     * the {@code Character} case mapping methods cannot.
6447     *
6448     * <p><b>Note:</b> This method cannot handle <a
6449     * href="#supplementary"> supplementary characters</a>. To support
6450     * all Unicode characters, including supplementary characters, use
6451     * the {@link #toUpperCase(int)} method.
6452     *
6453     * @param   ch   the character to be converted.
6454     * @return  the uppercase equivalent of the character, if any;
6455     *          otherwise, the character itself.
6456     * @see     Character#isUpperCase(char)
6457     * @see     String#toUpperCase()
6458     */
6459    public static char toUpperCase(char ch) {
6460        return (char)toUpperCase((int)ch);
6461    }
6462
6463    /**
6464     * Converts the character (Unicode code point) argument to
6465     * uppercase using case mapping information from the UnicodeData
6466     * file.
6467     *
6468     * <p>Note that
6469     * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6470     * does not always return {@code true} for some ranges of
6471     * characters, particularly those that are symbols or ideographs.
6472     *
6473     * <p>In general, {@link String#toUpperCase()} should be used to map
6474     * characters to uppercase. {@code String} case mapping methods
6475     * have several benefits over {@code Character} case mapping methods.
6476     * {@code String} case mapping methods can perform locale-sensitive
6477     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6478     * the {@code Character} case mapping methods cannot.
6479     *
6480     * @param   codePoint   the character (Unicode code point) to be converted.
6481     * @return  the uppercase equivalent of the character, if any;
6482     *          otherwise, the character itself.
6483     * @see     Character#isUpperCase(int)
6484     * @see     String#toUpperCase()
6485     *
6486     * @since   1.5
6487     */
6488    public static int toUpperCase(int codePoint) {
6489        if (codePoint >= 'a' && codePoint <= 'z') {
6490            return codePoint - ('a' - 'A');
6491        }
6492
6493        // All ASCII codepoints except the ones above remain unchanged.
6494        if (codePoint < 0x80) {
6495            return codePoint;
6496        }
6497
6498        return toUpperCaseImpl(codePoint);
6499    }
6500
6501    @FastNative
6502    static native int toUpperCaseImpl(int codePoint);
6503
6504    /**
6505     * Converts the character argument to titlecase using case mapping
6506     * information from the UnicodeData file. If a character has no
6507     * explicit titlecase mapping and is not itself a titlecase char
6508     * according to UnicodeData, then the uppercase mapping is
6509     * returned as an equivalent titlecase mapping. If the
6510     * {@code char} argument is already a titlecase
6511     * {@code char}, the same {@code char} value will be
6512     * returned.
6513     * <p>
6514     * Note that
6515     * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6516     * does not always return {@code true} for some ranges of
6517     * characters.
6518     *
6519     * <p><b>Note:</b> This method cannot handle <a
6520     * href="#supplementary"> supplementary characters</a>. To support
6521     * all Unicode characters, including supplementary characters, use
6522     * the {@link #toTitleCase(int)} method.
6523     *
6524     * @param   ch   the character to be converted.
6525     * @return  the titlecase equivalent of the character, if any;
6526     *          otherwise, the character itself.
6527     * @see     Character#isTitleCase(char)
6528     * @see     Character#toLowerCase(char)
6529     * @see     Character#toUpperCase(char)
6530     * @since   1.0.2
6531     */
6532    public static char toTitleCase(char ch) {
6533        return (char)toTitleCase((int)ch);
6534    }
6535
6536    /**
6537     * Converts the character (Unicode code point) argument to titlecase using case mapping
6538     * information from the UnicodeData file. If a character has no
6539     * explicit titlecase mapping and is not itself a titlecase char
6540     * according to UnicodeData, then the uppercase mapping is
6541     * returned as an equivalent titlecase mapping. If the
6542     * character argument is already a titlecase
6543     * character, the same character value will be
6544     * returned.
6545     *
6546     * <p>Note that
6547     * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6548     * does not always return {@code true} for some ranges of
6549     * characters.
6550     *
6551     * @param   codePoint   the character (Unicode code point) to be converted.
6552     * @return  the titlecase equivalent of the character, if any;
6553     *          otherwise, the character itself.
6554     * @see     Character#isTitleCase(int)
6555     * @see     Character#toLowerCase(int)
6556     * @see     Character#toUpperCase(int)
6557     * @since   1.5
6558     */
6559    public static int toTitleCase(int codePoint) {
6560        return toTitleCaseImpl(codePoint);
6561    }
6562
6563    @FastNative
6564    static native int toTitleCaseImpl(int codePoint);
6565
6566    /**
6567     * Returns the numeric value of the character {@code ch} in the
6568     * specified radix.
6569     * <p>
6570     * If the radix is not in the range {@code MIN_RADIX} &le;
6571     * {@code radix} &le; {@code MAX_RADIX} or if the
6572     * value of {@code ch} is not a valid digit in the specified
6573     * radix, {@code -1} is returned. A character is a valid digit
6574     * if at least one of the following is true:
6575     * <ul>
6576     * <li>The method {@code isDigit} is {@code true} of the character
6577     *     and the Unicode decimal digit value of the character (or its
6578     *     single-character decomposition) is less than the specified radix.
6579     *     In this case the decimal digit value is returned.
6580     * <li>The character is one of the uppercase Latin letters
6581     *     {@code 'A'} through {@code 'Z'} and its code is less than
6582     *     {@code radix + 'A' - 10}.
6583     *     In this case, {@code ch - 'A' + 10}
6584     *     is returned.
6585     * <li>The character is one of the lowercase Latin letters
6586     *     {@code 'a'} through {@code 'z'} and its code is less than
6587     *     {@code radix + 'a' - 10}.
6588     *     In this case, {@code ch - 'a' + 10}
6589     *     is returned.
6590     * <li>The character is one of the fullwidth uppercase Latin letters A
6591     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6592     *     and its code is less than
6593     *     {@code radix + '\u005CuFF21' - 10}.
6594     *     In this case, {@code ch - '\u005CuFF21' + 10}
6595     *     is returned.
6596     * <li>The character is one of the fullwidth lowercase Latin letters a
6597     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6598     *     and its code is less than
6599     *     {@code radix + '\u005CuFF41' - 10}.
6600     *     In this case, {@code ch - '\u005CuFF41' + 10}
6601     *     is returned.
6602     * </ul>
6603     *
6604     * <p><b>Note:</b> This method cannot handle <a
6605     * href="#supplementary"> supplementary characters</a>. To support
6606     * all Unicode characters, including supplementary characters, use
6607     * the {@link #digit(int, int)} method.
6608     *
6609     * @param   ch      the character to be converted.
6610     * @param   radix   the radix.
6611     * @return  the numeric value represented by the character in the
6612     *          specified radix.
6613     * @see     Character#forDigit(int, int)
6614     * @see     Character#isDigit(char)
6615     */
6616    public static int digit(char ch, int radix) {
6617        return digit((int)ch, radix);
6618    }
6619
6620    /**
6621     * Returns the numeric value of the specified character (Unicode
6622     * code point) in the specified radix.
6623     *
6624     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6625     * {@code radix} &le; {@code MAX_RADIX} or if the
6626     * character is not a valid digit in the specified
6627     * radix, {@code -1} is returned. A character is a valid digit
6628     * if at least one of the following is true:
6629     * <ul>
6630     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6631     *     and the Unicode decimal digit value of the character (or its
6632     *     single-character decomposition) is less than the specified radix.
6633     *     In this case the decimal digit value is returned.
6634     * <li>The character is one of the uppercase Latin letters
6635     *     {@code 'A'} through {@code 'Z'} and its code is less than
6636     *     {@code radix + 'A' - 10}.
6637     *     In this case, {@code codePoint - 'A' + 10}
6638     *     is returned.
6639     * <li>The character is one of the lowercase Latin letters
6640     *     {@code 'a'} through {@code 'z'} and its code is less than
6641     *     {@code radix + 'a' - 10}.
6642     *     In this case, {@code codePoint - 'a' + 10}
6643     *     is returned.
6644     * <li>The character is one of the fullwidth uppercase Latin letters A
6645     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6646     *     and its code is less than
6647     *     {@code radix + '\u005CuFF21' - 10}.
6648     *     In this case,
6649     *     {@code codePoint - '\u005CuFF21' + 10}
6650     *     is returned.
6651     * <li>The character is one of the fullwidth lowercase Latin letters a
6652     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6653     *     and its code is less than
6654     *     {@code radix + '\u005CuFF41'- 10}.
6655     *     In this case,
6656     *     {@code codePoint - '\u005CuFF41' + 10}
6657     *     is returned.
6658     * </ul>
6659     *
6660     * @param   codePoint the character (Unicode code point) to be converted.
6661     * @param   radix   the radix.
6662     * @return  the numeric value represented by the character in the
6663     *          specified radix.
6664     * @see     Character#forDigit(int, int)
6665     * @see     Character#isDigit(int)
6666     * @since   1.5
6667     */
6668    public static int digit(int codePoint, int radix) {
6669        if (radix < MIN_RADIX || radix > MAX_RADIX) {
6670            return -1;
6671        }
6672        if (codePoint < 128) {
6673            // Optimized for ASCII
6674            int result = -1;
6675            if ('0' <= codePoint && codePoint <= '9') {
6676                result = codePoint - '0';
6677            } else if ('a' <= codePoint && codePoint <= 'z') {
6678                result = 10 + (codePoint - 'a');
6679            } else if ('A' <= codePoint && codePoint <= 'Z') {
6680                result = 10 + (codePoint - 'A');
6681            }
6682            return result < radix ? result : -1;
6683        }
6684        return digitImpl(codePoint, radix);
6685    }
6686
6687    @FastNative
6688    native static int digitImpl(int codePoint, int radix);
6689
6690    /**
6691     * Returns the {@code int} value that the specified Unicode
6692     * character represents. For example, the character
6693     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6694     * an int with a value of 50.
6695     * <p>
6696     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6697     * {@code '\u005Cu005A'}), lowercase
6698     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6699     * full width variant ({@code '\u005CuFF21'} through
6700     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6701     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6702     * through 35. This is independent of the Unicode specification,
6703     * which does not assign numeric values to these {@code char}
6704     * values.
6705     * <p>
6706     * If the character does not have a numeric value, then -1 is returned.
6707     * If the character has a numeric value that cannot be represented as a
6708     * nonnegative integer (for example, a fractional value), then -2
6709     * is returned.
6710     *
6711     * <p><b>Note:</b> This method cannot handle <a
6712     * href="#supplementary"> supplementary characters</a>. To support
6713     * all Unicode characters, including supplementary characters, use
6714     * the {@link #getNumericValue(int)} method.
6715     *
6716     * @param   ch      the character to be converted.
6717     * @return  the numeric value of the character, as a nonnegative {@code int}
6718     *           value; -2 if the character has a numeric value that is not a
6719     *          nonnegative integer; -1 if the character has no numeric value.
6720     * @see     Character#forDigit(int, int)
6721     * @see     Character#isDigit(char)
6722     * @since   1.1
6723     */
6724    public static int getNumericValue(char ch) {
6725        return getNumericValue((int)ch);
6726    }
6727
6728    /**
6729     * Returns the {@code int} value that the specified
6730     * character (Unicode code point) represents. For example, the character
6731     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6732     * an {@code int} with a value of 50.
6733     * <p>
6734     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6735     * {@code '\u005Cu005A'}), lowercase
6736     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6737     * full width variant ({@code '\u005CuFF21'} through
6738     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6739     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6740     * through 35. This is independent of the Unicode specification,
6741     * which does not assign numeric values to these {@code char}
6742     * values.
6743     * <p>
6744     * If the character does not have a numeric value, then -1 is returned.
6745     * If the character has a numeric value that cannot be represented as a
6746     * nonnegative integer (for example, a fractional value), then -2
6747     * is returned.
6748     *
6749     * @param   codePoint the character (Unicode code point) to be converted.
6750     * @return  the numeric value of the character, as a nonnegative {@code int}
6751     *          value; -2 if the character has a numeric value that is not a
6752     *          nonnegative integer; -1 if the character has no numeric value.
6753     * @see     Character#forDigit(int, int)
6754     * @see     Character#isDigit(int)
6755     * @since   1.5
6756     */
6757    public static int getNumericValue(int codePoint) {
6758        // This is both an optimization and papers over differences between Java and ICU.
6759        if (codePoint < 128) {
6760            if (codePoint >= '0' && codePoint <= '9') {
6761                return codePoint - '0';
6762            }
6763            if (codePoint >= 'a' && codePoint <= 'z') {
6764                return codePoint - ('a' - 10);
6765            }
6766            if (codePoint >= 'A' && codePoint <= 'Z') {
6767                return codePoint - ('A' - 10);
6768            }
6769            return -1;
6770        }
6771        // Full-width uppercase A-Z.
6772        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
6773            return codePoint - 0xff17;
6774        }
6775        // Full-width lowercase a-z.
6776        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
6777            return codePoint - 0xff37;
6778        }
6779        return getNumericValueImpl(codePoint);
6780    }
6781
6782    @FastNative
6783    native static int getNumericValueImpl(int codePoint);
6784
6785    /**
6786     * Determines if the specified character is ISO-LATIN-1 white space.
6787     * This method returns {@code true} for the following five
6788     * characters only:
6789     * <table summary="truechars">
6790     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6791     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6792     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6793     *     <td>{@code NEW LINE}</td></tr>
6794     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6795     *     <td>{@code FORM FEED}</td></tr>
6796     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6797     *     <td>{@code CARRIAGE RETURN}</td></tr>
6798     * <tr><td>{@code ' '}</td>             <td>{@code U+0020}</td>
6799     *     <td>{@code SPACE}</td></tr>
6800     * </table>
6801     *
6802     * @param      ch   the character to be tested.
6803     * @return     {@code true} if the character is ISO-LATIN-1 white
6804     *             space; {@code false} otherwise.
6805     * @see        Character#isSpaceChar(char)
6806     * @see        Character#isWhitespace(char)
6807     * @deprecated Replaced by isWhitespace(char).
6808     */
6809    @Deprecated
6810    public static boolean isSpace(char ch) {
6811        return (ch <= 0x0020) &&
6812            (((((1L << 0x0009) |
6813            (1L << 0x000A) |
6814            (1L << 0x000C) |
6815            (1L << 0x000D) |
6816            (1L << 0x0020)) >> ch) & 1L) != 0);
6817    }
6818
6819
6820    /**
6821     * Determines if the specified character is a Unicode space character.
6822     * A character is considered to be a space character if and only if
6823     * it is specified to be a space character by the Unicode Standard. This
6824     * method returns true if the character's general category type is any of
6825     * the following:
6826     * <ul>
6827     * <li> {@code SPACE_SEPARATOR}
6828     * <li> {@code LINE_SEPARATOR}
6829     * <li> {@code PARAGRAPH_SEPARATOR}
6830     * </ul>
6831     *
6832     * <p><b>Note:</b> This method cannot handle <a
6833     * href="#supplementary"> supplementary characters</a>. To support
6834     * all Unicode characters, including supplementary characters, use
6835     * the {@link #isSpaceChar(int)} method.
6836     *
6837     * @param   ch      the character to be tested.
6838     * @return  {@code true} if the character is a space character;
6839     *          {@code false} otherwise.
6840     * @see     Character#isWhitespace(char)
6841     * @since   1.1
6842     */
6843    public static boolean isSpaceChar(char ch) {
6844        return isSpaceChar((int)ch);
6845    }
6846
6847    /**
6848     * Determines if the specified character (Unicode code point) is a
6849     * Unicode space character.  A character is considered to be a
6850     * space character if and only if it is specified to be a space
6851     * character by the Unicode Standard. This method returns true if
6852     * the character's general category type is any of the following:
6853     *
6854     * <ul>
6855     * <li> {@link #SPACE_SEPARATOR}
6856     * <li> {@link #LINE_SEPARATOR}
6857     * <li> {@link #PARAGRAPH_SEPARATOR}
6858     * </ul>
6859     *
6860     * @param   codePoint the character (Unicode code point) to be tested.
6861     * @return  {@code true} if the character is a space character;
6862     *          {@code false} otherwise.
6863     * @see     Character#isWhitespace(int)
6864     * @since   1.5
6865     */
6866    public static boolean isSpaceChar(int codePoint) {
6867        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6868        // SPACE or NO-BREAK SPACE?
6869        if (codePoint == 0x20 || codePoint == 0xa0) {
6870            return true;
6871        }
6872        if (codePoint < 0x1000) {
6873            return false;
6874        }
6875        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6876        if (codePoint == 0x1680 || codePoint == 0x180e) {
6877            return true;
6878        }
6879        if (codePoint < 0x2000) {
6880            return false;
6881        }
6882        if (codePoint <= 0xffff) {
6883            // Other whitespace from General Punctuation...
6884            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
6885                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6886        }
6887        // Let icu4c worry about non-BMP code points.
6888        return isSpaceCharImpl(codePoint);
6889    }
6890
6891    @FastNative
6892    static native boolean isSpaceCharImpl(int codePoint);
6893
6894    /**
6895     * Determines if the specified character is white space according to Java.
6896     * A character is a Java whitespace character if and only if it satisfies
6897     * one of the following criteria:
6898     * <ul>
6899     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6900     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6901     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6902     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6903     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6904     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6905     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6906     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6907     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6908     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6909     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6910     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6911     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6912     * </ul>
6913     *
6914     * <p><b>Note:</b> This method cannot handle <a
6915     * href="#supplementary"> supplementary characters</a>. To support
6916     * all Unicode characters, including supplementary characters, use
6917     * the {@link #isWhitespace(int)} method.
6918     *
6919     * @param   ch the character to be tested.
6920     * @return  {@code true} if the character is a Java whitespace
6921     *          character; {@code false} otherwise.
6922     * @see     Character#isSpaceChar(char)
6923     * @since   1.1
6924     */
6925    public static boolean isWhitespace(char ch) {
6926        return isWhitespace((int)ch);
6927    }
6928
6929    /**
6930     * Determines if the specified character (Unicode code point) is
6931     * white space according to Java.  A character is a Java
6932     * whitespace character if and only if it satisfies one of the
6933     * following criteria:
6934     * <ul>
6935     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6936     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6937     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6938     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6939     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6940     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6941     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6942     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6943     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6944     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6945     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6946     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6947     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6948     * </ul>
6949     * <p>
6950     *
6951     * @param   codePoint the character (Unicode code point) to be tested.
6952     * @return  {@code true} if the character is a Java whitespace
6953     *          character; {@code false} otherwise.
6954     * @see     Character#isSpaceChar(int)
6955     * @since   1.5
6956     */
6957    public static boolean isWhitespace(int codePoint) {
6958        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6959        // Any ASCII whitespace character?
6960        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
6961            return true;
6962        }
6963        if (codePoint < 0x1000) {
6964            return false;
6965        }
6966        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6967        if (codePoint == 0x1680 || codePoint == 0x180e) {
6968            return true;
6969        }
6970        if (codePoint < 0x2000) {
6971            return false;
6972        }
6973        // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
6974        if (codePoint == 0x2007 || codePoint == 0x202f) {
6975            return false;
6976        }
6977        if (codePoint <= 0xffff) {
6978            // Other whitespace from General Punctuation...
6979            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
6980                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6981        }
6982        // Let icu4c worry about non-BMP code points.
6983        return isWhitespaceImpl(codePoint);
6984    }
6985
6986    @FastNative
6987    native static boolean isWhitespaceImpl(int codePoint);
6988
6989    /**
6990     * Determines if the specified character is an ISO control
6991     * character.  A character is considered to be an ISO control
6992     * character if its code is in the range {@code '\u005Cu0000'}
6993     * through {@code '\u005Cu001F'} or in the range
6994     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6995     *
6996     * <p><b>Note:</b> This method cannot handle <a
6997     * href="#supplementary"> supplementary characters</a>. To support
6998     * all Unicode characters, including supplementary characters, use
6999     * the {@link #isISOControl(int)} method.
7000     *
7001     * @param   ch      the character to be tested.
7002     * @return  {@code true} if the character is an ISO control character;
7003     *          {@code false} otherwise.
7004     *
7005     * @see     Character#isSpaceChar(char)
7006     * @see     Character#isWhitespace(char)
7007     * @since   1.1
7008     */
7009    public static boolean isISOControl(char ch) {
7010        return isISOControl((int)ch);
7011    }
7012
7013    /**
7014     * Determines if the referenced character (Unicode code point) is an ISO control
7015     * character.  A character is considered to be an ISO control
7016     * character if its code is in the range {@code '\u005Cu0000'}
7017     * through {@code '\u005Cu001F'} or in the range
7018     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
7019     *
7020     * @param   codePoint the character (Unicode code point) to be tested.
7021     * @return  {@code true} if the character is an ISO control character;
7022     *          {@code false} otherwise.
7023     * @see     Character#isSpaceChar(int)
7024     * @see     Character#isWhitespace(int)
7025     * @since   1.5
7026     */
7027    public static boolean isISOControl(int codePoint) {
7028        // Optimized form of:
7029        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
7030        //     (codePoint >= 0x7F && codePoint <= 0x9F);
7031        return codePoint <= 0x9F &&
7032            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
7033    }
7034
7035    /**
7036     * Returns a value indicating a character's general category.
7037     *
7038     * <p><b>Note:</b> This method cannot handle <a
7039     * href="#supplementary"> supplementary characters</a>. To support
7040     * all Unicode characters, including supplementary characters, use
7041     * the {@link #getType(int)} method.
7042     *
7043     * @param   ch      the character to be tested.
7044     * @return  a value of type {@code int} representing the
7045     *          character's general category.
7046     * @see     Character#COMBINING_SPACING_MARK
7047     * @see     Character#CONNECTOR_PUNCTUATION
7048     * @see     Character#CONTROL
7049     * @see     Character#CURRENCY_SYMBOL
7050     * @see     Character#DASH_PUNCTUATION
7051     * @see     Character#DECIMAL_DIGIT_NUMBER
7052     * @see     Character#ENCLOSING_MARK
7053     * @see     Character#END_PUNCTUATION
7054     * @see     Character#FINAL_QUOTE_PUNCTUATION
7055     * @see     Character#FORMAT
7056     * @see     Character#INITIAL_QUOTE_PUNCTUATION
7057     * @see     Character#LETTER_NUMBER
7058     * @see     Character#LINE_SEPARATOR
7059     * @see     Character#LOWERCASE_LETTER
7060     * @see     Character#MATH_SYMBOL
7061     * @see     Character#MODIFIER_LETTER
7062     * @see     Character#MODIFIER_SYMBOL
7063     * @see     Character#NON_SPACING_MARK
7064     * @see     Character#OTHER_LETTER
7065     * @see     Character#OTHER_NUMBER
7066     * @see     Character#OTHER_PUNCTUATION
7067     * @see     Character#OTHER_SYMBOL
7068     * @see     Character#PARAGRAPH_SEPARATOR
7069     * @see     Character#PRIVATE_USE
7070     * @see     Character#SPACE_SEPARATOR
7071     * @see     Character#START_PUNCTUATION
7072     * @see     Character#SURROGATE
7073     * @see     Character#TITLECASE_LETTER
7074     * @see     Character#UNASSIGNED
7075     * @see     Character#UPPERCASE_LETTER
7076     * @since   1.1
7077     */
7078    public static int getType(char ch) {
7079        return getType((int)ch);
7080    }
7081
7082    /**
7083     * Returns a value indicating a character's general category.
7084     *
7085     * @param   codePoint the character (Unicode code point) to be tested.
7086     * @return  a value of type {@code int} representing the
7087     *          character's general category.
7088     * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
7089     * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
7090     * @see     Character#CONTROL CONTROL
7091     * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
7092     * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
7093     * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
7094     * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
7095     * @see     Character#END_PUNCTUATION END_PUNCTUATION
7096     * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
7097     * @see     Character#FORMAT FORMAT
7098     * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
7099     * @see     Character#LETTER_NUMBER LETTER_NUMBER
7100     * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
7101     * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
7102     * @see     Character#MATH_SYMBOL MATH_SYMBOL
7103     * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
7104     * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
7105     * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
7106     * @see     Character#OTHER_LETTER OTHER_LETTER
7107     * @see     Character#OTHER_NUMBER OTHER_NUMBER
7108     * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
7109     * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
7110     * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
7111     * @see     Character#PRIVATE_USE PRIVATE_USE
7112     * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
7113     * @see     Character#START_PUNCTUATION START_PUNCTUATION
7114     * @see     Character#SURROGATE SURROGATE
7115     * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
7116     * @see     Character#UNASSIGNED UNASSIGNED
7117     * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
7118     * @since   1.5
7119     */
7120    public static int getType(int codePoint) {
7121        int type = getTypeImpl(codePoint);
7122        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
7123        if (type <= Character.FORMAT) {
7124            return type;
7125        }
7126        return (type + 1);
7127    }
7128
7129    @FastNative
7130    static native int getTypeImpl(int codePoint);
7131
7132    /**
7133     * Determines the character representation for a specific digit in
7134     * the specified radix. If the value of {@code radix} is not a
7135     * valid radix, or the value of {@code digit} is not a valid
7136     * digit in the specified radix, the null character
7137     * ({@code '\u005Cu0000'}) is returned.
7138     * <p>
7139     * The {@code radix} argument is valid if it is greater than or
7140     * equal to {@code MIN_RADIX} and less than or equal to
7141     * {@code MAX_RADIX}. The {@code digit} argument is valid if
7142     * {@code 0 <= digit < radix}.
7143     * <p>
7144     * If the digit is less than 10, then
7145     * {@code '0' + digit} is returned. Otherwise, the value
7146     * {@code 'a' + digit - 10} is returned.
7147     *
7148     * @param   digit   the number to convert to a character.
7149     * @param   radix   the radix.
7150     * @return  the {@code char} representation of the specified digit
7151     *          in the specified radix.
7152     * @see     Character#MIN_RADIX
7153     * @see     Character#MAX_RADIX
7154     * @see     Character#digit(char, int)
7155     */
7156    public static char forDigit(int digit, int radix) {
7157        if ((digit >= radix) || (digit < 0)) {
7158            return '\0';
7159        }
7160        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
7161            return '\0';
7162        }
7163        if (digit < 10) {
7164            return (char)('0' + digit);
7165        }
7166        return (char)('a' - 10 + digit);
7167    }
7168
7169    /**
7170     * Returns the Unicode directionality property for the given
7171     * character.  Character directionality is used to calculate the
7172     * visual ordering of text. The directionality value of undefined
7173     * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7174     *
7175     * <p><b>Note:</b> This method cannot handle <a
7176     * href="#supplementary"> supplementary characters</a>. To support
7177     * all Unicode characters, including supplementary characters, use
7178     * the {@link #getDirectionality(int)} method.
7179     *
7180     * @param  ch {@code char} for which the directionality property
7181     *            is requested.
7182     * @return the directionality property of the {@code char} value.
7183     *
7184     * @see Character#DIRECTIONALITY_UNDEFINED
7185     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7186     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7187     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7188     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7189     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7190     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7191     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
7192     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7193     * @see Character#DIRECTIONALITY_NONSPACING_MARK
7194     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7195     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7196     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7197     * @see Character#DIRECTIONALITY_WHITESPACE
7198     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7199     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7200     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7201     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7202     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7203     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7204     * @since 1.4
7205     */
7206    public static byte getDirectionality(char ch) {
7207        return getDirectionality((int)ch);
7208    }
7209
7210    /**
7211     * Returns the Unicode directionality property for the given
7212     * character (Unicode code point).  Character directionality is
7213     * used to calculate the visual ordering of text. The
7214     * directionality value of undefined character is {@link
7215     * #DIRECTIONALITY_UNDEFINED}.
7216     *
7217     * @param   codePoint the character (Unicode code point) for which
7218     *          the directionality property is requested.
7219     * @return the directionality property of the character.
7220     *
7221     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7222     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7223     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7224     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7225     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7226     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7227     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7228     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7229     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7230     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7231     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7232     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7233     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7234     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7235     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7236     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7237     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7238     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7239     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7240     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7241     * @since    1.5
7242     */
7243    public static byte getDirectionality(int codePoint) {
7244        if (getType(codePoint) == Character.UNASSIGNED) {
7245            return Character.DIRECTIONALITY_UNDEFINED;
7246        }
7247
7248        byte directionality = getDirectionalityImpl(codePoint);
7249        if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
7250            return DIRECTIONALITY[directionality];
7251        }
7252        return Character.DIRECTIONALITY_UNDEFINED;
7253    }
7254
7255    @FastNative
7256    native static byte getDirectionalityImpl(int codePoint);
7257    /**
7258     * Determines whether the character is mirrored according to the
7259     * Unicode specification.  Mirrored characters should have their
7260     * glyphs horizontally mirrored when displayed in text that is
7261     * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7262     * PARENTHESIS is semantically defined to be an <i>opening
7263     * parenthesis</i>.  This will appear as a "(" in text that is
7264     * left-to-right but as a ")" in text that is right-to-left.
7265     *
7266     * <p><b>Note:</b> This method cannot handle <a
7267     * href="#supplementary"> supplementary characters</a>. To support
7268     * all Unicode characters, including supplementary characters, use
7269     * the {@link #isMirrored(int)} method.
7270     *
7271     * @param  ch {@code char} for which the mirrored property is requested
7272     * @return {@code true} if the char is mirrored, {@code false}
7273     *         if the {@code char} is not mirrored or is not defined.
7274     * @since 1.4
7275     */
7276    public static boolean isMirrored(char ch) {
7277        return isMirrored((int)ch);
7278    }
7279
7280    /**
7281     * Determines whether the specified character (Unicode code point)
7282     * is mirrored according to the Unicode specification.  Mirrored
7283     * characters should have their glyphs horizontally mirrored when
7284     * displayed in text that is right-to-left.  For example,
7285     * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7286     * defined to be an <i>opening parenthesis</i>.  This will appear
7287     * as a "(" in text that is left-to-right but as a ")" in text
7288     * that is right-to-left.
7289     *
7290     * @param   codePoint the character (Unicode code point) to be tested.
7291     * @return  {@code true} if the character is mirrored, {@code false}
7292     *          if the character is not mirrored or is not defined.
7293     * @since   1.5
7294     */
7295    public static boolean isMirrored(int codePoint) {
7296        return isMirroredImpl(codePoint);
7297    }
7298
7299    @FastNative
7300    native static boolean isMirroredImpl(int codePoint);
7301    /**
7302     * Compares two {@code Character} objects numerically.
7303     *
7304     * @param   anotherCharacter   the {@code Character} to be compared.
7305
7306     * @return  the value {@code 0} if the argument {@code Character}
7307     *          is equal to this {@code Character}; a value less than
7308     *          {@code 0} if this {@code Character} is numerically less
7309     *          than the {@code Character} argument; and a value greater than
7310     *          {@code 0} if this {@code Character} is numerically greater
7311     *          than the {@code Character} argument (unsigned comparison).
7312     *          Note that this is strictly a numerical comparison; it is not
7313     *          locale-dependent.
7314     * @since   1.2
7315     */
7316    public int compareTo(Character anotherCharacter) {
7317        return compare(this.value, anotherCharacter.value);
7318    }
7319
7320    /**
7321     * Compares two {@code char} values numerically.
7322     * The value returned is identical to what would be returned by:
7323     * <pre>
7324     *    Character.valueOf(x).compareTo(Character.valueOf(y))
7325     * </pre>
7326     *
7327     * @param  x the first {@code char} to compare
7328     * @param  y the second {@code char} to compare
7329     * @return the value {@code 0} if {@code x == y};
7330     *         a value less than {@code 0} if {@code x < y}; and
7331     *         a value greater than {@code 0} if {@code x > y}
7332     * @since 1.7
7333     */
7334    public static int compare(char x, char y) {
7335        return x - y;
7336    }
7337
7338    /**
7339     * The number of bits used to represent a <tt>char</tt> value in unsigned
7340     * binary form, constant {@code 16}.
7341     *
7342     * @since 1.5
7343     */
7344    public static final int SIZE = 16;
7345
7346    /**
7347     * The number of bytes used to represent a {@code char} value in unsigned
7348     * binary form.
7349     *
7350     * @since 1.8
7351     */
7352    public static final int BYTES = SIZE / Byte.SIZE;
7353
7354    /**
7355     * Returns the value obtained by reversing the order of the bytes in the
7356     * specified <tt>char</tt> value.
7357     *
7358     * @param ch The {@code char} of which to reverse the byte order.
7359     * @return the value obtained by reversing (or, equivalently, swapping)
7360     *     the bytes in the specified <tt>char</tt> value.
7361     * @since 1.5
7362     */
7363    public static char reverseBytes(char ch) {
7364        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7365    }
7366
7367    /**
7368     * Returns the Unicode name of the specified character
7369     * {@code codePoint}, or null if the code point is
7370     * {@link #UNASSIGNED unassigned}.
7371     * <p>
7372     * Note: if the specified character is not assigned a name by
7373     * the <i>UnicodeData</i> file (part of the Unicode Character
7374     * Database maintained by the Unicode Consortium), the returned
7375     * name is the same as the result of expression.
7376     *
7377     * <blockquote>{@code
7378     *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7379     *     + " "
7380     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7381     *
7382     * }</blockquote>
7383     *
7384     * @param  codePoint the character (Unicode code point)
7385     *
7386     * @return the Unicode name of the specified character, or null if
7387     *         the code point is unassigned.
7388     *
7389     * @exception IllegalArgumentException if the specified
7390     *            {@code codePoint} is not a valid Unicode
7391     *            code point.
7392     *
7393     * @since 1.7
7394     */
7395    public static String getName(int codePoint) {
7396        if (!isValidCodePoint(codePoint)) {
7397            throw new IllegalArgumentException();
7398        }
7399        String name = getNameImpl(codePoint);
7400        if (name != null)
7401            return name;
7402        if (getType(codePoint) == UNASSIGNED)
7403            return null;
7404        UnicodeBlock block = UnicodeBlock.of(codePoint);
7405        if (block != null)
7406            return block.toString().replace('_', ' ') + " "
7407                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7408        // should never come here
7409        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7410    }
7411
7412    private static native String getNameImpl(int codePoint);
7413}
7414