Character.java revision fa5b565a3f6c6d7cbd6106ee8d360304c3a939a3
1/*
2 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28import dalvik.annotation.optimization.FastNative;
29import java.util.Arrays;
30import java.util.HashMap;
31import java.util.Locale;
32import java.util.Map;
33
34/**
35 * The {@code Character} class wraps a value of the primitive
36 * type {@code char} in an object. An object of type
37 * {@code Character} contains a single field whose type is
38 * {@code char}.
39 * <p>
40 * In addition, this class provides several methods for determining
41 * a character's category (lowercase letter, digit, etc.) and for converting
42 * characters from uppercase to lowercase and vice versa.
43 * <p>
44 * Character information is based on the Unicode Standard, version 6.2.0.
45 * <p>
46 * The methods and data of class {@code Character} are defined by
47 * the information in the <i>UnicodeData</i> file that is part of the
48 * Unicode Character Database maintained by the Unicode
49 * Consortium. This file specifies various properties including name
50 * and general category for every defined Unicode code point or
51 * character range.
52 * <p>
53 * The file and its description are available from the Unicode Consortium at:
54 * <ul>
55 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
56 * </ul>
57 *
58 * <h3><a name="unicode">Unicode Character Representations</a></h3>
59 *
60 * <p>The {@code char} data type (and therefore the value that a
61 * {@code Character} object encapsulates) are based on the
62 * original Unicode specification, which defined characters as
63 * fixed-width 16-bit entities. The Unicode Standard has since been
64 * changed to allow for characters whose representation requires more
65 * than 16 bits.  The range of legal <em>code point</em>s is now
66 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
67 * (Refer to the <a
68 * href="http://www.unicode.org/reports/tr27/#notation"><i>
69 * definition</i></a> of the U+<i>n</i> notation in the Unicode
70 * Standard.)
71 *
72 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
73 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
74 * <a name="supplementary">Characters</a> whose code points are greater
75 * than U+FFFF are called <em>supplementary character</em>s.  The Java
76 * platform uses the UTF-16 representation in {@code char} arrays and
77 * in the {@code String} and {@code StringBuffer} classes. In
78 * this representation, supplementary characters are represented as a pair
79 * of {@code char} values, the first from the <em>high-surrogates</em>
80 * range, (&#92;uD800-&#92;uDBFF), the second from the
81 * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
82 *
83 * <p>A {@code char} value, therefore, represents Basic
84 * Multilingual Plane (BMP) code points, including the surrogate
85 * code points, or code units of the UTF-16 encoding. An
86 * {@code int} value represents all Unicode code points,
87 * including supplementary code points. The lower (least significant)
88 * 21 bits of {@code int} are used to represent Unicode code
89 * points and the upper (most significant) 11 bits must be zero.
90 * Unless otherwise specified, the behavior with respect to
91 * supplementary characters and surrogate {@code char} values is
92 * as follows:
93 *
94 * <ul>
95 * <li>The methods that only accept a {@code char} value cannot support
96 * supplementary characters. They treat {@code char} values from the
97 * surrogate ranges as undefined characters. For example,
98 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
99 * this specific value if followed by any low-surrogate value in a string
100 * would represent a letter.
101 *
102 * <li>The methods that accept an {@code int} value support all
103 * Unicode characters, including supplementary characters. For
104 * example, {@code Character.isLetter(0x2F81A)} returns
105 * {@code true} because the code point value represents a letter
106 * (a CJK ideograph).
107 * </ul>
108 *
109 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
110 * used for character values in the range between U+0000 and U+10FFFF,
111 * and <em>Unicode code unit</em> is used for 16-bit
112 * {@code char} values that are code units of the <em>UTF-16</em>
113 * encoding. For more information on Unicode terminology, refer to the
114 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
115 *
116 * @author  Lee Boynton
117 * @author  Guy Steele
118 * @author  Akira Tanaka
119 * @author  Martin Buchholz
120 * @author  Ulf Zibis
121 * @since   1.0
122 */
123public final
124class Character implements java.io.Serializable, Comparable<Character> {
125    /**
126     * The minimum radix available for conversion to and from strings.
127     * The constant value of this field is the smallest value permitted
128     * for the radix argument in radix-conversion methods such as the
129     * {@code digit} method, the {@code forDigit} method, and the
130     * {@code toString} method of class {@code Integer}.
131     *
132     * @see     Character#digit(char, int)
133     * @see     Character#forDigit(int, int)
134     * @see     Integer#toString(int, int)
135     * @see     Integer#valueOf(String)
136     */
137    public static final int MIN_RADIX = 2;
138
139    /**
140     * The maximum radix available for conversion to and from strings.
141     * The constant value of this field is the largest value permitted
142     * for the radix argument in radix-conversion methods such as the
143     * {@code digit} method, the {@code forDigit} method, and the
144     * {@code toString} method of class {@code Integer}.
145     *
146     * @see     Character#digit(char, int)
147     * @see     Character#forDigit(int, int)
148     * @see     Integer#toString(int, int)
149     * @see     Integer#valueOf(String)
150     */
151    public static final int MAX_RADIX = 36;
152
153    /**
154     * The constant value of this field is the smallest value of type
155     * {@code char}, {@code '\u005Cu0000'}.
156     *
157     * @since   1.0.2
158     */
159    public static final char MIN_VALUE = '\u0000';
160
161    /**
162     * The constant value of this field is the largest value of type
163     * {@code char}, {@code '\u005CuFFFF'}.
164     *
165     * @since   1.0.2
166     */
167    public static final char MAX_VALUE = '\uFFFF';
168
169    /**
170     * The {@code Class} instance representing the primitive type
171     * {@code char}.
172     *
173     * @since   1.1
174     */
175    @SuppressWarnings("unchecked")
176    public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
177
178    /*
179     * Normative general types
180     */
181
182    /*
183     * General character types
184     */
185
186    /**
187     * General category "Cn" in the Unicode specification.
188     * @since   1.1
189     */
190    public static final byte UNASSIGNED = 0;
191
192    /**
193     * General category "Lu" in the Unicode specification.
194     * @since   1.1
195     */
196    public static final byte UPPERCASE_LETTER = 1;
197
198    /**
199     * General category "Ll" in the Unicode specification.
200     * @since   1.1
201     */
202    public static final byte LOWERCASE_LETTER = 2;
203
204    /**
205     * General category "Lt" in the Unicode specification.
206     * @since   1.1
207     */
208    public static final byte TITLECASE_LETTER = 3;
209
210    /**
211     * General category "Lm" in the Unicode specification.
212     * @since   1.1
213     */
214    public static final byte MODIFIER_LETTER = 4;
215
216    /**
217     * General category "Lo" in the Unicode specification.
218     * @since   1.1
219     */
220    public static final byte OTHER_LETTER = 5;
221
222    /**
223     * General category "Mn" in the Unicode specification.
224     * @since   1.1
225     */
226    public static final byte NON_SPACING_MARK = 6;
227
228    /**
229     * General category "Me" in the Unicode specification.
230     * @since   1.1
231     */
232    public static final byte ENCLOSING_MARK = 7;
233
234    /**
235     * General category "Mc" in the Unicode specification.
236     * @since   1.1
237     */
238    public static final byte COMBINING_SPACING_MARK = 8;
239
240    /**
241     * General category "Nd" in the Unicode specification.
242     * @since   1.1
243     */
244    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
245
246    /**
247     * General category "Nl" in the Unicode specification.
248     * @since   1.1
249     */
250    public static final byte LETTER_NUMBER = 10;
251
252    /**
253     * General category "No" in the Unicode specification.
254     * @since   1.1
255     */
256    public static final byte OTHER_NUMBER = 11;
257
258    /**
259     * General category "Zs" in the Unicode specification.
260     * @since   1.1
261     */
262    public static final byte SPACE_SEPARATOR = 12;
263
264    /**
265     * General category "Zl" in the Unicode specification.
266     * @since   1.1
267     */
268    public static final byte LINE_SEPARATOR = 13;
269
270    /**
271     * General category "Zp" in the Unicode specification.
272     * @since   1.1
273     */
274    public static final byte PARAGRAPH_SEPARATOR = 14;
275
276    /**
277     * General category "Cc" in the Unicode specification.
278     * @since   1.1
279     */
280    public static final byte CONTROL = 15;
281
282    /**
283     * General category "Cf" in the Unicode specification.
284     * @since   1.1
285     */
286    public static final byte FORMAT = 16;
287
288    /**
289     * General category "Co" in the Unicode specification.
290     * @since   1.1
291     */
292    public static final byte PRIVATE_USE = 18;
293
294    /**
295     * General category "Cs" in the Unicode specification.
296     * @since   1.1
297     */
298    public static final byte SURROGATE = 19;
299
300    /**
301     * General category "Pd" in the Unicode specification.
302     * @since   1.1
303     */
304    public static final byte DASH_PUNCTUATION = 20;
305
306    /**
307     * General category "Ps" in the Unicode specification.
308     * @since   1.1
309     */
310    public static final byte START_PUNCTUATION = 21;
311
312    /**
313     * General category "Pe" in the Unicode specification.
314     * @since   1.1
315     */
316    public static final byte END_PUNCTUATION = 22;
317
318    /**
319     * General category "Pc" in the Unicode specification.
320     * @since   1.1
321     */
322    public static final byte CONNECTOR_PUNCTUATION = 23;
323
324    /**
325     * General category "Po" in the Unicode specification.
326     * @since   1.1
327     */
328    public static final byte OTHER_PUNCTUATION = 24;
329
330    /**
331     * General category "Sm" in the Unicode specification.
332     * @since   1.1
333     */
334    public static final byte MATH_SYMBOL = 25;
335
336    /**
337     * General category "Sc" in the Unicode specification.
338     * @since   1.1
339     */
340    public static final byte CURRENCY_SYMBOL = 26;
341
342    /**
343     * General category "Sk" in the Unicode specification.
344     * @since   1.1
345     */
346    public static final byte MODIFIER_SYMBOL = 27;
347
348    /**
349     * General category "So" in the Unicode specification.
350     * @since   1.1
351     */
352    public static final byte OTHER_SYMBOL = 28;
353
354    /**
355     * General category "Pi" in the Unicode specification.
356     * @since   1.4
357     */
358    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
359
360    /**
361     * General category "Pf" in the Unicode specification.
362     * @since   1.4
363     */
364    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
365
366    /**
367     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
368     */
369    static final int ERROR = 0xFFFFFFFF;
370
371
372    /**
373     * Undefined bidirectional character type. Undefined {@code char}
374     * values have undefined directionality in the Unicode specification.
375     * @since 1.4
376     */
377    public static final byte DIRECTIONALITY_UNDEFINED = -1;
378
379    /**
380     * Strong bidirectional character type "L" in the Unicode specification.
381     * @since 1.4
382     */
383    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
384
385    /**
386     * Strong bidirectional character type "R" in the Unicode specification.
387     * @since 1.4
388     */
389    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
390
391    /**
392    * Strong bidirectional character type "AL" in the Unicode specification.
393     * @since 1.4
394     */
395    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
396
397    /**
398     * Weak bidirectional character type "EN" in the Unicode specification.
399     * @since 1.4
400     */
401    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
402
403    /**
404     * Weak bidirectional character type "ES" in the Unicode specification.
405     * @since 1.4
406     */
407    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
408
409    /**
410     * Weak bidirectional character type "ET" in the Unicode specification.
411     * @since 1.4
412     */
413    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
414
415    /**
416     * Weak bidirectional character type "AN" in the Unicode specification.
417     * @since 1.4
418     */
419    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
420
421    /**
422     * Weak bidirectional character type "CS" in the Unicode specification.
423     * @since 1.4
424     */
425    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
426
427    /**
428     * Weak bidirectional character type "NSM" in the Unicode specification.
429     * @since 1.4
430     */
431    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
432
433    /**
434     * Weak bidirectional character type "BN" in the Unicode specification.
435     * @since 1.4
436     */
437    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
438
439    /**
440     * Neutral bidirectional character type "B" in the Unicode specification.
441     * @since 1.4
442     */
443    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
444
445    /**
446     * Neutral bidirectional character type "S" in the Unicode specification.
447     * @since 1.4
448     */
449    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
450
451    /**
452     * Neutral bidirectional character type "WS" in the Unicode specification.
453     * @since 1.4
454     */
455    public static final byte DIRECTIONALITY_WHITESPACE = 12;
456
457    /**
458     * Neutral bidirectional character type "ON" in the Unicode specification.
459     * @since 1.4
460     */
461    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
462
463    /**
464     * Strong bidirectional character type "LRE" in the Unicode specification.
465     * @since 1.4
466     */
467    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
468
469    /**
470     * Strong bidirectional character type "LRO" in the Unicode specification.
471     * @since 1.4
472     */
473    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
474
475    /**
476     * Strong bidirectional character type "RLE" in the Unicode specification.
477     * @since 1.4
478     */
479    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
480
481    /**
482     * Strong bidirectional character type "RLO" in the Unicode specification.
483     * @since 1.4
484     */
485    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
486
487    /**
488     * Weak bidirectional character type "PDF" in the Unicode specification.
489     * @since 1.4
490     */
491    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
492
493    /**
494     * The minimum value of a
495     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
496     * Unicode high-surrogate code unit</a>
497     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
498     * A high-surrogate is also known as a <i>leading-surrogate</i>.
499     *
500     * @since 1.5
501     */
502    public static final char MIN_HIGH_SURROGATE = '\uD800';
503
504    /**
505     * The maximum value of a
506     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
507     * Unicode high-surrogate code unit</a>
508     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
509     * A high-surrogate is also known as a <i>leading-surrogate</i>.
510     *
511     * @since 1.5
512     */
513    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
514
515    /**
516     * The minimum value of a
517     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
518     * Unicode low-surrogate code unit</a>
519     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
520     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
521     *
522     * @since 1.5
523     */
524    public static final char MIN_LOW_SURROGATE  = '\uDC00';
525
526    /**
527     * The maximum value of a
528     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
529     * Unicode low-surrogate code unit</a>
530     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
531     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
532     *
533     * @since 1.5
534     */
535    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
536
537    /**
538     * The minimum value of a Unicode surrogate code unit in the
539     * UTF-16 encoding, constant {@code '\u005CuD800'}.
540     *
541     * @since 1.5
542     */
543    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
544
545    /**
546     * The maximum value of a Unicode surrogate code unit in the
547     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
548     *
549     * @since 1.5
550     */
551    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
552
553    /**
554     * The minimum value of a
555     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
556     * Unicode supplementary code point</a>, constant {@code U+10000}.
557     *
558     * @since 1.5
559     */
560    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
561
562    /**
563     * The minimum value of a
564     * <a href="http://www.unicode.org/glossary/#code_point">
565     * Unicode code point</a>, constant {@code U+0000}.
566     *
567     * @since 1.5
568     */
569    public static final int MIN_CODE_POINT = 0x000000;
570
571    /**
572     * The maximum value of a
573     * <a href="http://www.unicode.org/glossary/#code_point">
574     * Unicode code point</a>, constant {@code U+10FFFF}.
575     *
576     * @since 1.5
577     */
578    public static final int MAX_CODE_POINT = 0X10FFFF;
579
580    private static final byte[] DIRECTIONALITY = new byte[] {
581            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
582            DIRECTIONALITY_EUROPEAN_NUMBER,
583            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
584            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
585            DIRECTIONALITY_ARABIC_NUMBER,
586            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
587            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
588            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
589            DIRECTIONALITY_OTHER_NEUTRALS,
590            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
591            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
592            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
593            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
594            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
595            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
596            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
597
598    /**
599     * Instances of this class represent particular subsets of the Unicode
600     * character set.  The only family of subsets defined in the
601     * {@code Character} class is {@link Character.UnicodeBlock}.
602     * Other portions of the Java API may define other subsets for their
603     * own purposes.
604     *
605     * @since 1.2
606     */
607    public static class Subset  {
608
609        private String name;
610
611        /**
612         * Constructs a new {@code Subset} instance.
613         *
614         * @param  name  The name of this subset
615         * @exception NullPointerException if name is {@code null}
616         */
617        protected Subset(String name) {
618            if (name == null) {
619                throw new NullPointerException("name");
620            }
621            this.name = name;
622        }
623
624        /**
625         * Compares two {@code Subset} objects for equality.
626         * This method returns {@code true} if and only if
627         * {@code this} and the argument refer to the same
628         * object; since this method is {@code final}, this
629         * guarantee holds for all subclasses.
630         */
631        public final boolean equals(Object obj) {
632            return (this == obj);
633        }
634
635        /**
636         * Returns the standard hash code as defined by the
637         * {@link Object#hashCode} method.  This method
638         * is {@code final} in order to ensure that the
639         * {@code equals} and {@code hashCode} methods will
640         * be consistent in all subclasses.
641         */
642        public final int hashCode() {
643            return super.hashCode();
644        }
645
646        /**
647         * Returns the name of this subset.
648         */
649        public final String toString() {
650            return name;
651        }
652    }
653
654    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
655    // for the latest specification of Unicode Blocks.
656
657    /**
658     * A family of character subsets representing the character blocks in the
659     * Unicode specification. Character blocks generally define characters
660     * used for a specific script or purpose. A character is contained by
661     * at most one Unicode block.
662     *
663     * @since 1.2
664     */
665    public static final class UnicodeBlock extends Subset {
666
667        private static Map<String, UnicodeBlock> map = new HashMap<>(256);
668
669        /**
670         * Creates a UnicodeBlock with the given identifier name.
671         * This name must be the same as the block identifier.
672         */
673        private UnicodeBlock(String idName) {
674            this(idName, true);
675        }
676
677        private UnicodeBlock(String idName, boolean isMap) {
678            super(idName);
679            if (isMap) {
680                map.put(idName, this);
681            }
682        }
683
684        /**
685         * Creates a UnicodeBlock with the given identifier name and
686         * alias name.
687         */
688        private UnicodeBlock(String idName, String alias) {
689            this(idName);
690            map.put(alias, this);
691        }
692
693        /**
694         * Creates a UnicodeBlock with the given identifier name and
695         * alias names.
696         */
697        private UnicodeBlock(String idName, String... aliases) {
698            this(idName);
699            for (String alias : aliases)
700                map.put(alias, this);
701        }
702
703        /**
704         * Constant for the "Basic Latin" Unicode character block.
705         * @since 1.2
706         */
707        public static final UnicodeBlock  BASIC_LATIN =
708            new UnicodeBlock("BASIC_LATIN",
709                             "BASIC LATIN",
710                             "BASICLATIN");
711
712        /**
713         * Constant for the "Latin-1 Supplement" Unicode character block.
714         * @since 1.2
715         */
716        public static final UnicodeBlock LATIN_1_SUPPLEMENT =
717            new UnicodeBlock("LATIN_1_SUPPLEMENT",
718                             "LATIN-1 SUPPLEMENT",
719                             "LATIN-1SUPPLEMENT");
720
721        /**
722         * Constant for the "Latin Extended-A" Unicode character block.
723         * @since 1.2
724         */
725        public static final UnicodeBlock LATIN_EXTENDED_A =
726            new UnicodeBlock("LATIN_EXTENDED_A",
727                             "LATIN EXTENDED-A",
728                             "LATINEXTENDED-A");
729
730        /**
731         * Constant for the "Latin Extended-B" Unicode character block.
732         * @since 1.2
733         */
734        public static final UnicodeBlock LATIN_EXTENDED_B =
735            new UnicodeBlock("LATIN_EXTENDED_B",
736                             "LATIN EXTENDED-B",
737                             "LATINEXTENDED-B");
738
739        /**
740         * Constant for the "IPA Extensions" Unicode character block.
741         * @since 1.2
742         */
743        public static final UnicodeBlock IPA_EXTENSIONS =
744            new UnicodeBlock("IPA_EXTENSIONS",
745                             "IPA EXTENSIONS",
746                             "IPAEXTENSIONS");
747
748        /**
749         * Constant for the "Spacing Modifier Letters" Unicode character block.
750         * @since 1.2
751         */
752        public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
753            new UnicodeBlock("SPACING_MODIFIER_LETTERS",
754                             "SPACING MODIFIER LETTERS",
755                             "SPACINGMODIFIERLETTERS");
756
757        /**
758         * Constant for the "Combining Diacritical Marks" Unicode character block.
759         * @since 1.2
760         */
761        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
762            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
763                             "COMBINING DIACRITICAL MARKS",
764                             "COMBININGDIACRITICALMARKS");
765
766        /**
767         * Constant for the "Greek and Coptic" Unicode character block.
768         * <p>
769         * This block was previously known as the "Greek" block.
770         *
771         * @since 1.2
772         */
773        public static final UnicodeBlock GREEK =
774            new UnicodeBlock("GREEK",
775                             "GREEK AND COPTIC",
776                             "GREEKANDCOPTIC");
777
778        /**
779         * Constant for the "Cyrillic" Unicode character block.
780         * @since 1.2
781         */
782        public static final UnicodeBlock CYRILLIC =
783            new UnicodeBlock("CYRILLIC");
784
785        /**
786         * Constant for the "Armenian" Unicode character block.
787         * @since 1.2
788         */
789        public static final UnicodeBlock ARMENIAN =
790            new UnicodeBlock("ARMENIAN");
791
792        /**
793         * Constant for the "Hebrew" Unicode character block.
794         * @since 1.2
795         */
796        public static final UnicodeBlock HEBREW =
797            new UnicodeBlock("HEBREW");
798
799        /**
800         * Constant for the "Arabic" Unicode character block.
801         * @since 1.2
802         */
803        public static final UnicodeBlock ARABIC =
804            new UnicodeBlock("ARABIC");
805
806        /**
807         * Constant for the "Devanagari" Unicode character block.
808         * @since 1.2
809         */
810        public static final UnicodeBlock DEVANAGARI =
811            new UnicodeBlock("DEVANAGARI");
812
813        /**
814         * Constant for the "Bengali" Unicode character block.
815         * @since 1.2
816         */
817        public static final UnicodeBlock BENGALI =
818            new UnicodeBlock("BENGALI");
819
820        /**
821         * Constant for the "Gurmukhi" Unicode character block.
822         * @since 1.2
823         */
824        public static final UnicodeBlock GURMUKHI =
825            new UnicodeBlock("GURMUKHI");
826
827        /**
828         * Constant for the "Gujarati" Unicode character block.
829         * @since 1.2
830         */
831        public static final UnicodeBlock GUJARATI =
832            new UnicodeBlock("GUJARATI");
833
834        /**
835         * Constant for the "Oriya" Unicode character block.
836         * @since 1.2
837         */
838        public static final UnicodeBlock ORIYA =
839            new UnicodeBlock("ORIYA");
840
841        /**
842         * Constant for the "Tamil" Unicode character block.
843         * @since 1.2
844         */
845        public static final UnicodeBlock TAMIL =
846            new UnicodeBlock("TAMIL");
847
848        /**
849         * Constant for the "Telugu" Unicode character block.
850         * @since 1.2
851         */
852        public static final UnicodeBlock TELUGU =
853            new UnicodeBlock("TELUGU");
854
855        /**
856         * Constant for the "Kannada" Unicode character block.
857         * @since 1.2
858         */
859        public static final UnicodeBlock KANNADA =
860            new UnicodeBlock("KANNADA");
861
862        /**
863         * Constant for the "Malayalam" Unicode character block.
864         * @since 1.2
865         */
866        public static final UnicodeBlock MALAYALAM =
867            new UnicodeBlock("MALAYALAM");
868
869        /**
870         * Constant for the "Thai" Unicode character block.
871         * @since 1.2
872         */
873        public static final UnicodeBlock THAI =
874            new UnicodeBlock("THAI");
875
876        /**
877         * Constant for the "Lao" Unicode character block.
878         * @since 1.2
879         */
880        public static final UnicodeBlock LAO =
881            new UnicodeBlock("LAO");
882
883        /**
884         * Constant for the "Tibetan" Unicode character block.
885         * @since 1.2
886         */
887        public static final UnicodeBlock TIBETAN =
888            new UnicodeBlock("TIBETAN");
889
890        /**
891         * Constant for the "Georgian" Unicode character block.
892         * @since 1.2
893         */
894        public static final UnicodeBlock GEORGIAN =
895            new UnicodeBlock("GEORGIAN");
896
897        /**
898         * Constant for the "Hangul Jamo" Unicode character block.
899         * @since 1.2
900         */
901        public static final UnicodeBlock HANGUL_JAMO =
902            new UnicodeBlock("HANGUL_JAMO",
903                             "HANGUL JAMO",
904                             "HANGULJAMO");
905
906        /**
907         * Constant for the "Latin Extended Additional" Unicode character block.
908         * @since 1.2
909         */
910        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
911            new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
912                             "LATIN EXTENDED ADDITIONAL",
913                             "LATINEXTENDEDADDITIONAL");
914
915        /**
916         * Constant for the "Greek Extended" Unicode character block.
917         * @since 1.2
918         */
919        public static final UnicodeBlock GREEK_EXTENDED =
920            new UnicodeBlock("GREEK_EXTENDED",
921                             "GREEK EXTENDED",
922                             "GREEKEXTENDED");
923
924        /**
925         * Constant for the "General Punctuation" Unicode character block.
926         * @since 1.2
927         */
928        public static final UnicodeBlock GENERAL_PUNCTUATION =
929            new UnicodeBlock("GENERAL_PUNCTUATION",
930                             "GENERAL PUNCTUATION",
931                             "GENERALPUNCTUATION");
932
933        /**
934         * Constant for the "Superscripts and Subscripts" Unicode character
935         * block.
936         * @since 1.2
937         */
938        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
939            new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
940                             "SUPERSCRIPTS AND SUBSCRIPTS",
941                             "SUPERSCRIPTSANDSUBSCRIPTS");
942
943        /**
944         * Constant for the "Currency Symbols" Unicode character block.
945         * @since 1.2
946         */
947        public static final UnicodeBlock CURRENCY_SYMBOLS =
948            new UnicodeBlock("CURRENCY_SYMBOLS",
949                             "CURRENCY SYMBOLS",
950                             "CURRENCYSYMBOLS");
951
952        /**
953         * Constant for the "Combining Diacritical Marks for Symbols" Unicode
954         * character block.
955         * <p>
956         * This block was previously known as "Combining Marks for Symbols".
957         * @since 1.2
958         */
959        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
960            new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
961                             "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
962                             "COMBININGDIACRITICALMARKSFORSYMBOLS",
963                             "COMBINING MARKS FOR SYMBOLS",
964                             "COMBININGMARKSFORSYMBOLS");
965
966        /**
967         * Constant for the "Letterlike Symbols" Unicode character block.
968         * @since 1.2
969         */
970        public static final UnicodeBlock LETTERLIKE_SYMBOLS =
971            new UnicodeBlock("LETTERLIKE_SYMBOLS",
972                             "LETTERLIKE SYMBOLS",
973                             "LETTERLIKESYMBOLS");
974
975        /**
976         * Constant for the "Number Forms" Unicode character block.
977         * @since 1.2
978         */
979        public static final UnicodeBlock NUMBER_FORMS =
980            new UnicodeBlock("NUMBER_FORMS",
981                             "NUMBER FORMS",
982                             "NUMBERFORMS");
983
984        /**
985         * Constant for the "Arrows" Unicode character block.
986         * @since 1.2
987         */
988        public static final UnicodeBlock ARROWS =
989            new UnicodeBlock("ARROWS");
990
991        /**
992         * Constant for the "Mathematical Operators" Unicode character block.
993         * @since 1.2
994         */
995        public static final UnicodeBlock MATHEMATICAL_OPERATORS =
996            new UnicodeBlock("MATHEMATICAL_OPERATORS",
997                             "MATHEMATICAL OPERATORS",
998                             "MATHEMATICALOPERATORS");
999
1000        /**
1001         * Constant for the "Miscellaneous Technical" Unicode character block.
1002         * @since 1.2
1003         */
1004        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1005            new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1006                             "MISCELLANEOUS TECHNICAL",
1007                             "MISCELLANEOUSTECHNICAL");
1008
1009        /**
1010         * Constant for the "Control Pictures" Unicode character block.
1011         * @since 1.2
1012         */
1013        public static final UnicodeBlock CONTROL_PICTURES =
1014            new UnicodeBlock("CONTROL_PICTURES",
1015                             "CONTROL PICTURES",
1016                             "CONTROLPICTURES");
1017
1018        /**
1019         * Constant for the "Optical Character Recognition" Unicode character block.
1020         * @since 1.2
1021         */
1022        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1023            new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1024                             "OPTICAL CHARACTER RECOGNITION",
1025                             "OPTICALCHARACTERRECOGNITION");
1026
1027        /**
1028         * Constant for the "Enclosed Alphanumerics" Unicode character block.
1029         * @since 1.2
1030         */
1031        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1032            new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1033                             "ENCLOSED ALPHANUMERICS",
1034                             "ENCLOSEDALPHANUMERICS");
1035
1036        /**
1037         * Constant for the "Box Drawing" Unicode character block.
1038         * @since 1.2
1039         */
1040        public static final UnicodeBlock BOX_DRAWING =
1041            new UnicodeBlock("BOX_DRAWING",
1042                             "BOX DRAWING",
1043                             "BOXDRAWING");
1044
1045        /**
1046         * Constant for the "Block Elements" Unicode character block.
1047         * @since 1.2
1048         */
1049        public static final UnicodeBlock BLOCK_ELEMENTS =
1050            new UnicodeBlock("BLOCK_ELEMENTS",
1051                             "BLOCK ELEMENTS",
1052                             "BLOCKELEMENTS");
1053
1054        /**
1055         * Constant for the "Geometric Shapes" Unicode character block.
1056         * @since 1.2
1057         */
1058        public static final UnicodeBlock GEOMETRIC_SHAPES =
1059            new UnicodeBlock("GEOMETRIC_SHAPES",
1060                             "GEOMETRIC SHAPES",
1061                             "GEOMETRICSHAPES");
1062
1063        /**
1064         * Constant for the "Miscellaneous Symbols" Unicode character block.
1065         * @since 1.2
1066         */
1067        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1068            new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1069                             "MISCELLANEOUS SYMBOLS",
1070                             "MISCELLANEOUSSYMBOLS");
1071
1072        /**
1073         * Constant for the "Dingbats" Unicode character block.
1074         * @since 1.2
1075         */
1076        public static final UnicodeBlock DINGBATS =
1077            new UnicodeBlock("DINGBATS");
1078
1079        /**
1080         * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1081         * @since 1.2
1082         */
1083        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1084            new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1085                             "CJK SYMBOLS AND PUNCTUATION",
1086                             "CJKSYMBOLSANDPUNCTUATION");
1087
1088        /**
1089         * Constant for the "Hiragana" Unicode character block.
1090         * @since 1.2
1091         */
1092        public static final UnicodeBlock HIRAGANA =
1093            new UnicodeBlock("HIRAGANA");
1094
1095        /**
1096         * Constant for the "Katakana" Unicode character block.
1097         * @since 1.2
1098         */
1099        public static final UnicodeBlock KATAKANA =
1100            new UnicodeBlock("KATAKANA");
1101
1102        /**
1103         * Constant for the "Bopomofo" Unicode character block.
1104         * @since 1.2
1105         */
1106        public static final UnicodeBlock BOPOMOFO =
1107            new UnicodeBlock("BOPOMOFO");
1108
1109        /**
1110         * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1111         * @since 1.2
1112         */
1113        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1114            new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1115                             "HANGUL COMPATIBILITY JAMO",
1116                             "HANGULCOMPATIBILITYJAMO");
1117
1118        /**
1119         * Constant for the "Kanbun" Unicode character block.
1120         * @since 1.2
1121         */
1122        public static final UnicodeBlock KANBUN =
1123            new UnicodeBlock("KANBUN");
1124
1125        /**
1126         * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1127         * @since 1.2
1128         */
1129        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1130            new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1131                             "ENCLOSED CJK LETTERS AND MONTHS",
1132                             "ENCLOSEDCJKLETTERSANDMONTHS");
1133
1134        /**
1135         * Constant for the "CJK Compatibility" Unicode character block.
1136         * @since 1.2
1137         */
1138        public static final UnicodeBlock CJK_COMPATIBILITY =
1139            new UnicodeBlock("CJK_COMPATIBILITY",
1140                             "CJK COMPATIBILITY",
1141                             "CJKCOMPATIBILITY");
1142
1143        /**
1144         * Constant for the "CJK Unified Ideographs" Unicode character block.
1145         * @since 1.2
1146         */
1147        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1148            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1149                             "CJK UNIFIED IDEOGRAPHS",
1150                             "CJKUNIFIEDIDEOGRAPHS");
1151
1152        /**
1153         * Constant for the "Hangul Syllables" Unicode character block.
1154         * @since 1.2
1155         */
1156        public static final UnicodeBlock HANGUL_SYLLABLES =
1157            new UnicodeBlock("HANGUL_SYLLABLES",
1158                             "HANGUL SYLLABLES",
1159                             "HANGULSYLLABLES");
1160
1161        /**
1162         * Constant for the "Private Use Area" Unicode character block.
1163         * @since 1.2
1164         */
1165        public static final UnicodeBlock PRIVATE_USE_AREA =
1166            new UnicodeBlock("PRIVATE_USE_AREA",
1167                             "PRIVATE USE AREA",
1168                             "PRIVATEUSEAREA");
1169
1170        /**
1171         * Constant for the "CJK Compatibility Ideographs" Unicode character
1172         * block.
1173         * @since 1.2
1174         */
1175        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1176            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1177                             "CJK COMPATIBILITY IDEOGRAPHS",
1178                             "CJKCOMPATIBILITYIDEOGRAPHS");
1179
1180        /**
1181         * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1182         * @since 1.2
1183         */
1184        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1185            new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1186                             "ALPHABETIC PRESENTATION FORMS",
1187                             "ALPHABETICPRESENTATIONFORMS");
1188
1189        /**
1190         * Constant for the "Arabic Presentation Forms-A" Unicode character
1191         * block.
1192         * @since 1.2
1193         */
1194        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1195            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1196                             "ARABIC PRESENTATION FORMS-A",
1197                             "ARABICPRESENTATIONFORMS-A");
1198
1199        /**
1200         * Constant for the "Combining Half Marks" Unicode character block.
1201         * @since 1.2
1202         */
1203        public static final UnicodeBlock COMBINING_HALF_MARKS =
1204            new UnicodeBlock("COMBINING_HALF_MARKS",
1205                             "COMBINING HALF MARKS",
1206                             "COMBININGHALFMARKS");
1207
1208        /**
1209         * Constant for the "CJK Compatibility Forms" Unicode character block.
1210         * @since 1.2
1211         */
1212        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1213            new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1214                             "CJK COMPATIBILITY FORMS",
1215                             "CJKCOMPATIBILITYFORMS");
1216
1217        /**
1218         * Constant for the "Small Form Variants" Unicode character block.
1219         * @since 1.2
1220         */
1221        public static final UnicodeBlock SMALL_FORM_VARIANTS =
1222            new UnicodeBlock("SMALL_FORM_VARIANTS",
1223                             "SMALL FORM VARIANTS",
1224                             "SMALLFORMVARIANTS");
1225
1226        /**
1227         * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1228         * @since 1.2
1229         */
1230        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1231            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1232                             "ARABIC PRESENTATION FORMS-B",
1233                             "ARABICPRESENTATIONFORMS-B");
1234
1235        /**
1236         * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1237         * block.
1238         * @since 1.2
1239         */
1240        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1241            new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1242                             "HALFWIDTH AND FULLWIDTH FORMS",
1243                             "HALFWIDTHANDFULLWIDTHFORMS");
1244
1245        /**
1246         * Constant for the "Specials" Unicode character block.
1247         * @since 1.2
1248         */
1249        public static final UnicodeBlock SPECIALS =
1250            new UnicodeBlock("SPECIALS");
1251
1252        /**
1253         * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1254         *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1255         *             {@link #LOW_SURROGATES}. These new constants match
1256         *             the block definitions of the Unicode Standard.
1257         *             The {@link #of(char)} and {@link #of(int)} methods
1258         *             return the new constants, not SURROGATES_AREA.
1259         */
1260        @Deprecated
1261        public static final UnicodeBlock SURROGATES_AREA =
1262            new UnicodeBlock("SURROGATES_AREA", false);
1263
1264        /**
1265         * Constant for the "Syriac" Unicode character block.
1266         * @since 1.4
1267         */
1268        public static final UnicodeBlock SYRIAC =
1269            new UnicodeBlock("SYRIAC");
1270
1271        /**
1272         * Constant for the "Thaana" Unicode character block.
1273         * @since 1.4
1274         */
1275        public static final UnicodeBlock THAANA =
1276            new UnicodeBlock("THAANA");
1277
1278        /**
1279         * Constant for the "Sinhala" Unicode character block.
1280         * @since 1.4
1281         */
1282        public static final UnicodeBlock SINHALA =
1283            new UnicodeBlock("SINHALA");
1284
1285        /**
1286         * Constant for the "Myanmar" Unicode character block.
1287         * @since 1.4
1288         */
1289        public static final UnicodeBlock MYANMAR =
1290            new UnicodeBlock("MYANMAR");
1291
1292        /**
1293         * Constant for the "Ethiopic" Unicode character block.
1294         * @since 1.4
1295         */
1296        public static final UnicodeBlock ETHIOPIC =
1297            new UnicodeBlock("ETHIOPIC");
1298
1299        /**
1300         * Constant for the "Cherokee" Unicode character block.
1301         * @since 1.4
1302         */
1303        public static final UnicodeBlock CHEROKEE =
1304            new UnicodeBlock("CHEROKEE");
1305
1306        /**
1307         * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1308         * @since 1.4
1309         */
1310        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1311            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1312                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1313                             "UNIFIEDCANADIANABORIGINALSYLLABICS");
1314
1315        /**
1316         * Constant for the "Ogham" Unicode character block.
1317         * @since 1.4
1318         */
1319        public static final UnicodeBlock OGHAM =
1320            new UnicodeBlock("OGHAM");
1321
1322        /**
1323         * Constant for the "Runic" Unicode character block.
1324         * @since 1.4
1325         */
1326        public static final UnicodeBlock RUNIC =
1327            new UnicodeBlock("RUNIC");
1328
1329        /**
1330         * Constant for the "Khmer" Unicode character block.
1331         * @since 1.4
1332         */
1333        public static final UnicodeBlock KHMER =
1334            new UnicodeBlock("KHMER");
1335
1336        /**
1337         * Constant for the "Mongolian" Unicode character block.
1338         * @since 1.4
1339         */
1340        public static final UnicodeBlock MONGOLIAN =
1341            new UnicodeBlock("MONGOLIAN");
1342
1343        /**
1344         * Constant for the "Braille Patterns" Unicode character block.
1345         * @since 1.4
1346         */
1347        public static final UnicodeBlock BRAILLE_PATTERNS =
1348            new UnicodeBlock("BRAILLE_PATTERNS",
1349                             "BRAILLE PATTERNS",
1350                             "BRAILLEPATTERNS");
1351
1352        /**
1353         * Constant for the "CJK Radicals Supplement" Unicode character block.
1354         * @since 1.4
1355         */
1356        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1357            new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1358                             "CJK RADICALS SUPPLEMENT",
1359                             "CJKRADICALSSUPPLEMENT");
1360
1361        /**
1362         * Constant for the "Kangxi Radicals" Unicode character block.
1363         * @since 1.4
1364         */
1365        public static final UnicodeBlock KANGXI_RADICALS =
1366            new UnicodeBlock("KANGXI_RADICALS",
1367                             "KANGXI RADICALS",
1368                             "KANGXIRADICALS");
1369
1370        /**
1371         * Constant for the "Ideographic Description Characters" Unicode character block.
1372         * @since 1.4
1373         */
1374        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1375            new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1376                             "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1377                             "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1378
1379        /**
1380         * Constant for the "Bopomofo Extended" Unicode character block.
1381         * @since 1.4
1382         */
1383        public static final UnicodeBlock BOPOMOFO_EXTENDED =
1384            new UnicodeBlock("BOPOMOFO_EXTENDED",
1385                             "BOPOMOFO EXTENDED",
1386                             "BOPOMOFOEXTENDED");
1387
1388        /**
1389         * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1390         * @since 1.4
1391         */
1392        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1393            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1394                             "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1395                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1396
1397        /**
1398         * Constant for the "Yi Syllables" Unicode character block.
1399         * @since 1.4
1400         */
1401        public static final UnicodeBlock YI_SYLLABLES =
1402            new UnicodeBlock("YI_SYLLABLES",
1403                             "YI SYLLABLES",
1404                             "YISYLLABLES");
1405
1406        /**
1407         * Constant for the "Yi Radicals" Unicode character block.
1408         * @since 1.4
1409         */
1410        public static final UnicodeBlock YI_RADICALS =
1411            new UnicodeBlock("YI_RADICALS",
1412                             "YI RADICALS",
1413                             "YIRADICALS");
1414
1415        /**
1416         * Constant for the "Cyrillic Supplementary" Unicode character block.
1417         * @since 1.5
1418         */
1419        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1420            new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1421                             "CYRILLIC SUPPLEMENTARY",
1422                             "CYRILLICSUPPLEMENTARY",
1423                             "CYRILLIC SUPPLEMENT",
1424                             "CYRILLICSUPPLEMENT");
1425
1426        /**
1427         * Constant for the "Tagalog" Unicode character block.
1428         * @since 1.5
1429         */
1430        public static final UnicodeBlock TAGALOG =
1431            new UnicodeBlock("TAGALOG");
1432
1433        /**
1434         * Constant for the "Hanunoo" Unicode character block.
1435         * @since 1.5
1436         */
1437        public static final UnicodeBlock HANUNOO =
1438            new UnicodeBlock("HANUNOO");
1439
1440        /**
1441         * Constant for the "Buhid" Unicode character block.
1442         * @since 1.5
1443         */
1444        public static final UnicodeBlock BUHID =
1445            new UnicodeBlock("BUHID");
1446
1447        /**
1448         * Constant for the "Tagbanwa" Unicode character block.
1449         * @since 1.5
1450         */
1451        public static final UnicodeBlock TAGBANWA =
1452            new UnicodeBlock("TAGBANWA");
1453
1454        /**
1455         * Constant for the "Limbu" Unicode character block.
1456         * @since 1.5
1457         */
1458        public static final UnicodeBlock LIMBU =
1459            new UnicodeBlock("LIMBU");
1460
1461        /**
1462         * Constant for the "Tai Le" Unicode character block.
1463         * @since 1.5
1464         */
1465        public static final UnicodeBlock TAI_LE =
1466            new UnicodeBlock("TAI_LE",
1467                             "TAI LE",
1468                             "TAILE");
1469
1470        /**
1471         * Constant for the "Khmer Symbols" Unicode character block.
1472         * @since 1.5
1473         */
1474        public static final UnicodeBlock KHMER_SYMBOLS =
1475            new UnicodeBlock("KHMER_SYMBOLS",
1476                             "KHMER SYMBOLS",
1477                             "KHMERSYMBOLS");
1478
1479        /**
1480         * Constant for the "Phonetic Extensions" Unicode character block.
1481         * @since 1.5
1482         */
1483        public static final UnicodeBlock PHONETIC_EXTENSIONS =
1484            new UnicodeBlock("PHONETIC_EXTENSIONS",
1485                             "PHONETIC EXTENSIONS",
1486                             "PHONETICEXTENSIONS");
1487
1488        /**
1489         * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1490         * @since 1.5
1491         */
1492        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1493            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1494                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1495                             "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1496
1497        /**
1498         * Constant for the "Supplemental Arrows-A" Unicode character block.
1499         * @since 1.5
1500         */
1501        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1502            new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1503                             "SUPPLEMENTAL ARROWS-A",
1504                             "SUPPLEMENTALARROWS-A");
1505
1506        /**
1507         * Constant for the "Supplemental Arrows-B" Unicode character block.
1508         * @since 1.5
1509         */
1510        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1511            new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1512                             "SUPPLEMENTAL ARROWS-B",
1513                             "SUPPLEMENTALARROWS-B");
1514
1515        /**
1516         * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1517         * character block.
1518         * @since 1.5
1519         */
1520        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1521            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1522                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1523                             "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1524
1525        /**
1526         * Constant for the "Supplemental Mathematical Operators" Unicode
1527         * character block.
1528         * @since 1.5
1529         */
1530        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1531            new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1532                             "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1533                             "SUPPLEMENTALMATHEMATICALOPERATORS");
1534
1535        /**
1536         * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1537         * block.
1538         * @since 1.5
1539         */
1540        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1541            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1542                             "MISCELLANEOUS SYMBOLS AND ARROWS",
1543                             "MISCELLANEOUSSYMBOLSANDARROWS");
1544
1545        /**
1546         * Constant for the "Katakana Phonetic Extensions" Unicode character
1547         * block.
1548         * @since 1.5
1549         */
1550        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1551            new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1552                             "KATAKANA PHONETIC EXTENSIONS",
1553                             "KATAKANAPHONETICEXTENSIONS");
1554
1555        /**
1556         * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1557         * @since 1.5
1558         */
1559        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1560            new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1561                             "YIJING HEXAGRAM SYMBOLS",
1562                             "YIJINGHEXAGRAMSYMBOLS");
1563
1564        /**
1565         * Constant for the "Variation Selectors" Unicode character block.
1566         * @since 1.5
1567         */
1568        public static final UnicodeBlock VARIATION_SELECTORS =
1569            new UnicodeBlock("VARIATION_SELECTORS",
1570                             "VARIATION SELECTORS",
1571                             "VARIATIONSELECTORS");
1572
1573        /**
1574         * Constant for the "Linear B Syllabary" Unicode character block.
1575         * @since 1.5
1576         */
1577        public static final UnicodeBlock LINEAR_B_SYLLABARY =
1578            new UnicodeBlock("LINEAR_B_SYLLABARY",
1579                             "LINEAR B SYLLABARY",
1580                             "LINEARBSYLLABARY");
1581
1582        /**
1583         * Constant for the "Linear B Ideograms" Unicode character block.
1584         * @since 1.5
1585         */
1586        public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1587            new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1588                             "LINEAR B IDEOGRAMS",
1589                             "LINEARBIDEOGRAMS");
1590
1591        /**
1592         * Constant for the "Aegean Numbers" Unicode character block.
1593         * @since 1.5
1594         */
1595        public static final UnicodeBlock AEGEAN_NUMBERS =
1596            new UnicodeBlock("AEGEAN_NUMBERS",
1597                             "AEGEAN NUMBERS",
1598                             "AEGEANNUMBERS");
1599
1600        /**
1601         * Constant for the "Old Italic" Unicode character block.
1602         * @since 1.5
1603         */
1604        public static final UnicodeBlock OLD_ITALIC =
1605            new UnicodeBlock("OLD_ITALIC",
1606                             "OLD ITALIC",
1607                             "OLDITALIC");
1608
1609        /**
1610         * Constant for the "Gothic" Unicode character block.
1611         * @since 1.5
1612         */
1613        public static final UnicodeBlock GOTHIC =
1614            new UnicodeBlock("GOTHIC");
1615
1616        /**
1617         * Constant for the "Ugaritic" Unicode character block.
1618         * @since 1.5
1619         */
1620        public static final UnicodeBlock UGARITIC =
1621            new UnicodeBlock("UGARITIC");
1622
1623        /**
1624         * Constant for the "Deseret" Unicode character block.
1625         * @since 1.5
1626         */
1627        public static final UnicodeBlock DESERET =
1628            new UnicodeBlock("DESERET");
1629
1630        /**
1631         * Constant for the "Shavian" Unicode character block.
1632         * @since 1.5
1633         */
1634        public static final UnicodeBlock SHAVIAN =
1635            new UnicodeBlock("SHAVIAN");
1636
1637        /**
1638         * Constant for the "Osmanya" Unicode character block.
1639         * @since 1.5
1640         */
1641        public static final UnicodeBlock OSMANYA =
1642            new UnicodeBlock("OSMANYA");
1643
1644        /**
1645         * Constant for the "Cypriot Syllabary" Unicode character block.
1646         * @since 1.5
1647         */
1648        public static final UnicodeBlock CYPRIOT_SYLLABARY =
1649            new UnicodeBlock("CYPRIOT_SYLLABARY",
1650                             "CYPRIOT SYLLABARY",
1651                             "CYPRIOTSYLLABARY");
1652
1653        /**
1654         * Constant for the "Byzantine Musical Symbols" Unicode character block.
1655         * @since 1.5
1656         */
1657        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1658            new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1659                             "BYZANTINE MUSICAL SYMBOLS",
1660                             "BYZANTINEMUSICALSYMBOLS");
1661
1662        /**
1663         * Constant for the "Musical Symbols" Unicode character block.
1664         * @since 1.5
1665         */
1666        public static final UnicodeBlock MUSICAL_SYMBOLS =
1667            new UnicodeBlock("MUSICAL_SYMBOLS",
1668                             "MUSICAL SYMBOLS",
1669                             "MUSICALSYMBOLS");
1670
1671        /**
1672         * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1673         * @since 1.5
1674         */
1675        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1676            new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1677                             "TAI XUAN JING SYMBOLS",
1678                             "TAIXUANJINGSYMBOLS");
1679
1680        /**
1681         * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1682         * character block.
1683         * @since 1.5
1684         */
1685        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1686            new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1687                             "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1688                             "MATHEMATICALALPHANUMERICSYMBOLS");
1689
1690        /**
1691         * Constant for the "CJK Unified Ideographs Extension B" Unicode
1692         * character block.
1693         * @since 1.5
1694         */
1695        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1696            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1697                             "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1698                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1699
1700        /**
1701         * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1702         * @since 1.5
1703         */
1704        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1705            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1706                             "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1707                             "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1708
1709        /**
1710         * Constant for the "Tags" Unicode character block.
1711         * @since 1.5
1712         */
1713        public static final UnicodeBlock TAGS =
1714            new UnicodeBlock("TAGS");
1715
1716        /**
1717         * Constant for the "Variation Selectors Supplement" Unicode character
1718         * block.
1719         * @since 1.5
1720         */
1721        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1722            new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1723                             "VARIATION SELECTORS SUPPLEMENT",
1724                             "VARIATIONSELECTORSSUPPLEMENT");
1725
1726        /**
1727         * Constant for the "Supplementary Private Use Area-A" Unicode character
1728         * block.
1729         * @since 1.5
1730         */
1731        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1732            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1733                             "SUPPLEMENTARY PRIVATE USE AREA-A",
1734                             "SUPPLEMENTARYPRIVATEUSEAREA-A");
1735
1736        /**
1737         * Constant for the "Supplementary Private Use Area-B" Unicode character
1738         * block.
1739         * @since 1.5
1740         */
1741        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1742            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1743                             "SUPPLEMENTARY PRIVATE USE AREA-B",
1744                             "SUPPLEMENTARYPRIVATEUSEAREA-B");
1745
1746        /**
1747         * Constant for the "High Surrogates" Unicode character block.
1748         * This block represents codepoint values in the high surrogate
1749         * range: U+D800 through U+DB7F
1750         *
1751         * @since 1.5
1752         */
1753        public static final UnicodeBlock HIGH_SURROGATES =
1754            new UnicodeBlock("HIGH_SURROGATES",
1755                             "HIGH SURROGATES",
1756                             "HIGHSURROGATES");
1757
1758        /**
1759         * Constant for the "High Private Use Surrogates" Unicode character
1760         * block.
1761         * This block represents codepoint values in the private use high
1762         * surrogate range: U+DB80 through U+DBFF
1763         *
1764         * @since 1.5
1765         */
1766        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1767            new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1768                             "HIGH PRIVATE USE SURROGATES",
1769                             "HIGHPRIVATEUSESURROGATES");
1770
1771        /**
1772         * Constant for the "Low Surrogates" Unicode character block.
1773         * This block represents codepoint values in the low surrogate
1774         * range: U+DC00 through U+DFFF
1775         *
1776         * @since 1.5
1777         */
1778        public static final UnicodeBlock LOW_SURROGATES =
1779            new UnicodeBlock("LOW_SURROGATES",
1780                             "LOW SURROGATES",
1781                             "LOWSURROGATES");
1782
1783        /**
1784         * Constant for the "Arabic Supplement" Unicode character block.
1785         * @since 1.7
1786         */
1787        public static final UnicodeBlock ARABIC_SUPPLEMENT =
1788            new UnicodeBlock("ARABIC_SUPPLEMENT",
1789                             "ARABIC SUPPLEMENT",
1790                             "ARABICSUPPLEMENT");
1791
1792        /**
1793         * Constant for the "NKo" Unicode character block.
1794         * @since 1.7
1795         */
1796        public static final UnicodeBlock NKO =
1797            new UnicodeBlock("NKO");
1798
1799        /**
1800         * Constant for the "Samaritan" Unicode character block.
1801         * @since 1.7
1802         */
1803        public static final UnicodeBlock SAMARITAN =
1804            new UnicodeBlock("SAMARITAN");
1805
1806        /**
1807         * Constant for the "Mandaic" Unicode character block.
1808         * @since 1.7
1809         */
1810        public static final UnicodeBlock MANDAIC =
1811            new UnicodeBlock("MANDAIC");
1812
1813        /**
1814         * Constant for the "Ethiopic Supplement" Unicode character block.
1815         * @since 1.7
1816         */
1817        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1818            new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1819                             "ETHIOPIC SUPPLEMENT",
1820                             "ETHIOPICSUPPLEMENT");
1821
1822        /**
1823         * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1824         * Unicode character block.
1825         * @since 1.7
1826         */
1827        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1828            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1829                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1830                             "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1831
1832        /**
1833         * Constant for the "New Tai Lue" Unicode character block.
1834         * @since 1.7
1835         */
1836        public static final UnicodeBlock NEW_TAI_LUE =
1837            new UnicodeBlock("NEW_TAI_LUE",
1838                             "NEW TAI LUE",
1839                             "NEWTAILUE");
1840
1841        /**
1842         * Constant for the "Buginese" Unicode character block.
1843         * @since 1.7
1844         */
1845        public static final UnicodeBlock BUGINESE =
1846            new UnicodeBlock("BUGINESE");
1847
1848        /**
1849         * Constant for the "Tai Tham" Unicode character block.
1850         * @since 1.7
1851         */
1852        public static final UnicodeBlock TAI_THAM =
1853            new UnicodeBlock("TAI_THAM",
1854                             "TAI THAM",
1855                             "TAITHAM");
1856
1857        /**
1858         * Constant for the "Balinese" Unicode character block.
1859         * @since 1.7
1860         */
1861        public static final UnicodeBlock BALINESE =
1862            new UnicodeBlock("BALINESE");
1863
1864        /**
1865         * Constant for the "Sundanese" Unicode character block.
1866         * @since 1.7
1867         */
1868        public static final UnicodeBlock SUNDANESE =
1869            new UnicodeBlock("SUNDANESE");
1870
1871        /**
1872         * Constant for the "Batak" Unicode character block.
1873         * @since 1.7
1874         */
1875        public static final UnicodeBlock BATAK =
1876            new UnicodeBlock("BATAK");
1877
1878        /**
1879         * Constant for the "Lepcha" Unicode character block.
1880         * @since 1.7
1881         */
1882        public static final UnicodeBlock LEPCHA =
1883            new UnicodeBlock("LEPCHA");
1884
1885        /**
1886         * Constant for the "Ol Chiki" Unicode character block.
1887         * @since 1.7
1888         */
1889        public static final UnicodeBlock OL_CHIKI =
1890            new UnicodeBlock("OL_CHIKI",
1891                             "OL CHIKI",
1892                             "OLCHIKI");
1893
1894        /**
1895         * Constant for the "Vedic Extensions" Unicode character block.
1896         * @since 1.7
1897         */
1898        public static final UnicodeBlock VEDIC_EXTENSIONS =
1899            new UnicodeBlock("VEDIC_EXTENSIONS",
1900                             "VEDIC EXTENSIONS",
1901                             "VEDICEXTENSIONS");
1902
1903        /**
1904         * Constant for the "Phonetic Extensions Supplement" Unicode character
1905         * block.
1906         * @since 1.7
1907         */
1908        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1909            new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1910                             "PHONETIC EXTENSIONS SUPPLEMENT",
1911                             "PHONETICEXTENSIONSSUPPLEMENT");
1912
1913        /**
1914         * Constant for the "Combining Diacritical Marks Supplement" Unicode
1915         * character block.
1916         * @since 1.7
1917         */
1918        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1919            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1920                             "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1921                             "COMBININGDIACRITICALMARKSSUPPLEMENT");
1922
1923        /**
1924         * Constant for the "Glagolitic" Unicode character block.
1925         * @since 1.7
1926         */
1927        public static final UnicodeBlock GLAGOLITIC =
1928            new UnicodeBlock("GLAGOLITIC");
1929
1930        /**
1931         * Constant for the "Latin Extended-C" Unicode character block.
1932         * @since 1.7
1933         */
1934        public static final UnicodeBlock LATIN_EXTENDED_C =
1935            new UnicodeBlock("LATIN_EXTENDED_C",
1936                             "LATIN EXTENDED-C",
1937                             "LATINEXTENDED-C");
1938
1939        /**
1940         * Constant for the "Coptic" Unicode character block.
1941         * @since 1.7
1942         */
1943        public static final UnicodeBlock COPTIC =
1944            new UnicodeBlock("COPTIC");
1945
1946        /**
1947         * Constant for the "Georgian Supplement" Unicode character block.
1948         * @since 1.7
1949         */
1950        public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1951            new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1952                             "GEORGIAN SUPPLEMENT",
1953                             "GEORGIANSUPPLEMENT");
1954
1955        /**
1956         * Constant for the "Tifinagh" Unicode character block.
1957         * @since 1.7
1958         */
1959        public static final UnicodeBlock TIFINAGH =
1960            new UnicodeBlock("TIFINAGH");
1961
1962        /**
1963         * Constant for the "Ethiopic Extended" Unicode character block.
1964         * @since 1.7
1965         */
1966        public static final UnicodeBlock ETHIOPIC_EXTENDED =
1967            new UnicodeBlock("ETHIOPIC_EXTENDED",
1968                             "ETHIOPIC EXTENDED",
1969                             "ETHIOPICEXTENDED");
1970
1971        /**
1972         * Constant for the "Cyrillic Extended-A" Unicode character block.
1973         * @since 1.7
1974         */
1975        public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1976            new UnicodeBlock("CYRILLIC_EXTENDED_A",
1977                             "CYRILLIC EXTENDED-A",
1978                             "CYRILLICEXTENDED-A");
1979
1980        /**
1981         * Constant for the "Supplemental Punctuation" Unicode character block.
1982         * @since 1.7
1983         */
1984        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1985            new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1986                             "SUPPLEMENTAL PUNCTUATION",
1987                             "SUPPLEMENTALPUNCTUATION");
1988
1989        /**
1990         * Constant for the "CJK Strokes" Unicode character block.
1991         * @since 1.7
1992         */
1993        public static final UnicodeBlock CJK_STROKES =
1994            new UnicodeBlock("CJK_STROKES",
1995                             "CJK STROKES",
1996                             "CJKSTROKES");
1997
1998        /**
1999         * Constant for the "Lisu" Unicode character block.
2000         * @since 1.7
2001         */
2002        public static final UnicodeBlock LISU =
2003            new UnicodeBlock("LISU");
2004
2005        /**
2006         * Constant for the "Vai" Unicode character block.
2007         * @since 1.7
2008         */
2009        public static final UnicodeBlock VAI =
2010            new UnicodeBlock("VAI");
2011
2012        /**
2013         * Constant for the "Cyrillic Extended-B" Unicode character block.
2014         * @since 1.7
2015         */
2016        public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2017            new UnicodeBlock("CYRILLIC_EXTENDED_B",
2018                             "CYRILLIC EXTENDED-B",
2019                             "CYRILLICEXTENDED-B");
2020
2021        /**
2022         * Constant for the "Bamum" Unicode character block.
2023         * @since 1.7
2024         */
2025        public static final UnicodeBlock BAMUM =
2026            new UnicodeBlock("BAMUM");
2027
2028        /**
2029         * Constant for the "Modifier Tone Letters" Unicode character block.
2030         * @since 1.7
2031         */
2032        public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2033            new UnicodeBlock("MODIFIER_TONE_LETTERS",
2034                             "MODIFIER TONE LETTERS",
2035                             "MODIFIERTONELETTERS");
2036
2037        /**
2038         * Constant for the "Latin Extended-D" Unicode character block.
2039         * @since 1.7
2040         */
2041        public static final UnicodeBlock LATIN_EXTENDED_D =
2042            new UnicodeBlock("LATIN_EXTENDED_D",
2043                             "LATIN EXTENDED-D",
2044                             "LATINEXTENDED-D");
2045
2046        /**
2047         * Constant for the "Syloti Nagri" Unicode character block.
2048         * @since 1.7
2049         */
2050        public static final UnicodeBlock SYLOTI_NAGRI =
2051            new UnicodeBlock("SYLOTI_NAGRI",
2052                             "SYLOTI NAGRI",
2053                             "SYLOTINAGRI");
2054
2055        /**
2056         * Constant for the "Common Indic Number Forms" Unicode character block.
2057         * @since 1.7
2058         */
2059        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2060            new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2061                             "COMMON INDIC NUMBER FORMS",
2062                             "COMMONINDICNUMBERFORMS");
2063
2064        /**
2065         * Constant for the "Phags-pa" Unicode character block.
2066         * @since 1.7
2067         */
2068        public static final UnicodeBlock PHAGS_PA =
2069            new UnicodeBlock("PHAGS_PA",
2070                             "PHAGS-PA");
2071
2072        /**
2073         * Constant for the "Saurashtra" Unicode character block.
2074         * @since 1.7
2075         */
2076        public static final UnicodeBlock SAURASHTRA =
2077            new UnicodeBlock("SAURASHTRA");
2078
2079        /**
2080         * Constant for the "Devanagari Extended" Unicode character block.
2081         * @since 1.7
2082         */
2083        public static final UnicodeBlock DEVANAGARI_EXTENDED =
2084            new UnicodeBlock("DEVANAGARI_EXTENDED",
2085                             "DEVANAGARI EXTENDED",
2086                             "DEVANAGARIEXTENDED");
2087
2088        /**
2089         * Constant for the "Kayah Li" Unicode character block.
2090         * @since 1.7
2091         */
2092        public static final UnicodeBlock KAYAH_LI =
2093            new UnicodeBlock("KAYAH_LI",
2094                             "KAYAH LI",
2095                             "KAYAHLI");
2096
2097        /**
2098         * Constant for the "Rejang" Unicode character block.
2099         * @since 1.7
2100         */
2101        public static final UnicodeBlock REJANG =
2102            new UnicodeBlock("REJANG");
2103
2104        /**
2105         * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2106         * @since 1.7
2107         */
2108        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2109            new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2110                             "HANGUL JAMO EXTENDED-A",
2111                             "HANGULJAMOEXTENDED-A");
2112
2113        /**
2114         * Constant for the "Javanese" Unicode character block.
2115         * @since 1.7
2116         */
2117        public static final UnicodeBlock JAVANESE =
2118            new UnicodeBlock("JAVANESE");
2119
2120        /**
2121         * Constant for the "Cham" Unicode character block.
2122         * @since 1.7
2123         */
2124        public static final UnicodeBlock CHAM =
2125            new UnicodeBlock("CHAM");
2126
2127        /**
2128         * Constant for the "Myanmar Extended-A" Unicode character block.
2129         * @since 1.7
2130         */
2131        public static final UnicodeBlock MYANMAR_EXTENDED_A =
2132            new UnicodeBlock("MYANMAR_EXTENDED_A",
2133                             "MYANMAR EXTENDED-A",
2134                             "MYANMAREXTENDED-A");
2135
2136        /**
2137         * Constant for the "Tai Viet" Unicode character block.
2138         * @since 1.7
2139         */
2140        public static final UnicodeBlock TAI_VIET =
2141            new UnicodeBlock("TAI_VIET",
2142                             "TAI VIET",
2143                             "TAIVIET");
2144
2145        /**
2146         * Constant for the "Ethiopic Extended-A" Unicode character block.
2147         * @since 1.7
2148         */
2149        public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2150            new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2151                             "ETHIOPIC EXTENDED-A",
2152                             "ETHIOPICEXTENDED-A");
2153
2154        /**
2155         * Constant for the "Meetei Mayek" Unicode character block.
2156         * @since 1.7
2157         */
2158        public static final UnicodeBlock MEETEI_MAYEK =
2159            new UnicodeBlock("MEETEI_MAYEK",
2160                             "MEETEI MAYEK",
2161                             "MEETEIMAYEK");
2162
2163        /**
2164         * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2165         * @since 1.7
2166         */
2167        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2168            new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2169                             "HANGUL JAMO EXTENDED-B",
2170                             "HANGULJAMOEXTENDED-B");
2171
2172        /**
2173         * Constant for the "Vertical Forms" Unicode character block.
2174         * @since 1.7
2175         */
2176        public static final UnicodeBlock VERTICAL_FORMS =
2177            new UnicodeBlock("VERTICAL_FORMS",
2178                             "VERTICAL FORMS",
2179                             "VERTICALFORMS");
2180
2181        /**
2182         * Constant for the "Ancient Greek Numbers" Unicode character block.
2183         * @since 1.7
2184         */
2185        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2186            new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2187                             "ANCIENT GREEK NUMBERS",
2188                             "ANCIENTGREEKNUMBERS");
2189
2190        /**
2191         * Constant for the "Ancient Symbols" Unicode character block.
2192         * @since 1.7
2193         */
2194        public static final UnicodeBlock ANCIENT_SYMBOLS =
2195            new UnicodeBlock("ANCIENT_SYMBOLS",
2196                             "ANCIENT SYMBOLS",
2197                             "ANCIENTSYMBOLS");
2198
2199        /**
2200         * Constant for the "Phaistos Disc" Unicode character block.
2201         * @since 1.7
2202         */
2203        public static final UnicodeBlock PHAISTOS_DISC =
2204            new UnicodeBlock("PHAISTOS_DISC",
2205                             "PHAISTOS DISC",
2206                             "PHAISTOSDISC");
2207
2208        /**
2209         * Constant for the "Lycian" Unicode character block.
2210         * @since 1.7
2211         */
2212        public static final UnicodeBlock LYCIAN =
2213            new UnicodeBlock("LYCIAN");
2214
2215        /**
2216         * Constant for the "Carian" Unicode character block.
2217         * @since 1.7
2218         */
2219        public static final UnicodeBlock CARIAN =
2220            new UnicodeBlock("CARIAN");
2221
2222        /**
2223         * Constant for the "Old Persian" Unicode character block.
2224         * @since 1.7
2225         */
2226        public static final UnicodeBlock OLD_PERSIAN =
2227            new UnicodeBlock("OLD_PERSIAN",
2228                             "OLD PERSIAN",
2229                             "OLDPERSIAN");
2230
2231        /**
2232         * Constant for the "Imperial Aramaic" Unicode character block.
2233         * @since 1.7
2234         */
2235        public static final UnicodeBlock IMPERIAL_ARAMAIC =
2236            new UnicodeBlock("IMPERIAL_ARAMAIC",
2237                             "IMPERIAL ARAMAIC",
2238                             "IMPERIALARAMAIC");
2239
2240        /**
2241         * Constant for the "Phoenician" Unicode character block.
2242         * @since 1.7
2243         */
2244        public static final UnicodeBlock PHOENICIAN =
2245            new UnicodeBlock("PHOENICIAN");
2246
2247        /**
2248         * Constant for the "Lydian" Unicode character block.
2249         * @since 1.7
2250         */
2251        public static final UnicodeBlock LYDIAN =
2252            new UnicodeBlock("LYDIAN");
2253
2254        /**
2255         * Constant for the "Kharoshthi" Unicode character block.
2256         * @since 1.7
2257         */
2258        public static final UnicodeBlock KHAROSHTHI =
2259            new UnicodeBlock("KHAROSHTHI");
2260
2261        /**
2262         * Constant for the "Old South Arabian" Unicode character block.
2263         * @since 1.7
2264         */
2265        public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2266            new UnicodeBlock("OLD_SOUTH_ARABIAN",
2267                             "OLD SOUTH ARABIAN",
2268                             "OLDSOUTHARABIAN");
2269
2270        /**
2271         * Constant for the "Avestan" Unicode character block.
2272         * @since 1.7
2273         */
2274        public static final UnicodeBlock AVESTAN =
2275            new UnicodeBlock("AVESTAN");
2276
2277        /**
2278         * Constant for the "Inscriptional Parthian" Unicode character block.
2279         * @since 1.7
2280         */
2281        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2282            new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2283                             "INSCRIPTIONAL PARTHIAN",
2284                             "INSCRIPTIONALPARTHIAN");
2285
2286        /**
2287         * Constant for the "Inscriptional Pahlavi" Unicode character block.
2288         * @since 1.7
2289         */
2290        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2291            new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2292                             "INSCRIPTIONAL PAHLAVI",
2293                             "INSCRIPTIONALPAHLAVI");
2294
2295        /**
2296         * Constant for the "Old Turkic" Unicode character block.
2297         * @since 1.7
2298         */
2299        public static final UnicodeBlock OLD_TURKIC =
2300            new UnicodeBlock("OLD_TURKIC",
2301                             "OLD TURKIC",
2302                             "OLDTURKIC");
2303
2304        /**
2305         * Constant for the "Rumi Numeral Symbols" Unicode character block.
2306         * @since 1.7
2307         */
2308        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2309            new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2310                             "RUMI NUMERAL SYMBOLS",
2311                             "RUMINUMERALSYMBOLS");
2312
2313        /**
2314         * Constant for the "Brahmi" Unicode character block.
2315         * @since 1.7
2316         */
2317        public static final UnicodeBlock BRAHMI =
2318            new UnicodeBlock("BRAHMI");
2319
2320        /**
2321         * Constant for the "Kaithi" Unicode character block.
2322         * @since 1.7
2323         */
2324        public static final UnicodeBlock KAITHI =
2325            new UnicodeBlock("KAITHI");
2326
2327        /**
2328         * Constant for the "Cuneiform" Unicode character block.
2329         * @since 1.7
2330         */
2331        public static final UnicodeBlock CUNEIFORM =
2332            new UnicodeBlock("CUNEIFORM");
2333
2334        /**
2335         * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2336         * character block.
2337         * @since 1.7
2338         */
2339        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2340            new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2341                             "CUNEIFORM NUMBERS AND PUNCTUATION",
2342                             "CUNEIFORMNUMBERSANDPUNCTUATION");
2343
2344        /**
2345         * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2346         * @since 1.7
2347         */
2348        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2349            new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2350                             "EGYPTIAN HIEROGLYPHS",
2351                             "EGYPTIANHIEROGLYPHS");
2352
2353        /**
2354         * Constant for the "Bamum Supplement" Unicode character block.
2355         * @since 1.7
2356         */
2357        public static final UnicodeBlock BAMUM_SUPPLEMENT =
2358            new UnicodeBlock("BAMUM_SUPPLEMENT",
2359                             "BAMUM SUPPLEMENT",
2360                             "BAMUMSUPPLEMENT");
2361
2362        /**
2363         * Constant for the "Kana Supplement" Unicode character block.
2364         * @since 1.7
2365         */
2366        public static final UnicodeBlock KANA_SUPPLEMENT =
2367            new UnicodeBlock("KANA_SUPPLEMENT",
2368                             "KANA SUPPLEMENT",
2369                             "KANASUPPLEMENT");
2370
2371        /**
2372         * Constant for the "Ancient Greek Musical Notation" Unicode character
2373         * block.
2374         * @since 1.7
2375         */
2376        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2377            new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2378                             "ANCIENT GREEK MUSICAL NOTATION",
2379                             "ANCIENTGREEKMUSICALNOTATION");
2380
2381        /**
2382         * Constant for the "Counting Rod Numerals" Unicode character block.
2383         * @since 1.7
2384         */
2385        public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2386            new UnicodeBlock("COUNTING_ROD_NUMERALS",
2387                             "COUNTING ROD NUMERALS",
2388                             "COUNTINGRODNUMERALS");
2389
2390        /**
2391         * Constant for the "Mahjong Tiles" Unicode character block.
2392         * @since 1.7
2393         */
2394        public static final UnicodeBlock MAHJONG_TILES =
2395            new UnicodeBlock("MAHJONG_TILES",
2396                             "MAHJONG TILES",
2397                             "MAHJONGTILES");
2398
2399        /**
2400         * Constant for the "Domino Tiles" Unicode character block.
2401         * @since 1.7
2402         */
2403        public static final UnicodeBlock DOMINO_TILES =
2404            new UnicodeBlock("DOMINO_TILES",
2405                             "DOMINO TILES",
2406                             "DOMINOTILES");
2407
2408        /**
2409         * Constant for the "Playing Cards" Unicode character block.
2410         * @since 1.7
2411         */
2412        public static final UnicodeBlock PLAYING_CARDS =
2413            new UnicodeBlock("PLAYING_CARDS",
2414                             "PLAYING CARDS",
2415                             "PLAYINGCARDS");
2416
2417        /**
2418         * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2419         * block.
2420         * @since 1.7
2421         */
2422        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2423            new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2424                             "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2425                             "ENCLOSEDALPHANUMERICSUPPLEMENT");
2426
2427        /**
2428         * Constant for the "Enclosed Ideographic Supplement" Unicode character
2429         * block.
2430         * @since 1.7
2431         */
2432        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2433            new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2434                             "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2435                             "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2436
2437        /**
2438         * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2439         * character block.
2440         * @since 1.7
2441         */
2442        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2443            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2444                             "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2445                             "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2446
2447        /**
2448         * Constant for the "Emoticons" Unicode character block.
2449         * @since 1.7
2450         */
2451        public static final UnicodeBlock EMOTICONS =
2452            new UnicodeBlock("EMOTICONS");
2453
2454        /**
2455         * Constant for the "Transport And Map Symbols" Unicode character block.
2456         * @since 1.7
2457         */
2458        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2459            new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2460                             "TRANSPORT AND MAP SYMBOLS",
2461                             "TRANSPORTANDMAPSYMBOLS");
2462
2463        /**
2464         * Constant for the "Alchemical Symbols" Unicode character block.
2465         * @since 1.7
2466         */
2467        public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2468            new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2469                             "ALCHEMICAL SYMBOLS",
2470                             "ALCHEMICALSYMBOLS");
2471
2472        /**
2473         * Constant for the "CJK Unified Ideographs Extension C" Unicode
2474         * character block.
2475         * @since 1.7
2476         */
2477        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2478            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2479                             "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2480                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2481
2482        /**
2483         * Constant for the "CJK Unified Ideographs Extension D" Unicode
2484         * character block.
2485         * @since 1.7
2486         */
2487        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2488            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2489                             "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2490                             "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2491
2492        /**
2493         * Constant for the "Arabic Extended-A" Unicode character block.
2494         * @since 1.8
2495         */
2496        public static final UnicodeBlock ARABIC_EXTENDED_A =
2497            new UnicodeBlock("ARABIC_EXTENDED_A",
2498                             "ARABIC EXTENDED-A",
2499                             "ARABICEXTENDED-A");
2500
2501        /**
2502         * Constant for the "Sundanese Supplement" Unicode character block.
2503         * @since 1.8
2504         */
2505        public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2506            new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2507                             "SUNDANESE SUPPLEMENT",
2508                             "SUNDANESESUPPLEMENT");
2509
2510        /**
2511         * Constant for the "Meetei Mayek Extensions" Unicode character block.
2512         * @since 1.8
2513         */
2514        public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2515            new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2516                             "MEETEI MAYEK EXTENSIONS",
2517                             "MEETEIMAYEKEXTENSIONS");
2518
2519        /**
2520         * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2521         * @since 1.8
2522         */
2523        public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2524            new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2525                             "MEROITIC HIEROGLYPHS",
2526                             "MEROITICHIEROGLYPHS");
2527
2528        /**
2529         * Constant for the "Meroitic Cursive" Unicode character block.
2530         * @since 1.8
2531         */
2532        public static final UnicodeBlock MEROITIC_CURSIVE =
2533            new UnicodeBlock("MEROITIC_CURSIVE",
2534                             "MEROITIC CURSIVE",
2535                             "MEROITICCURSIVE");
2536
2537        /**
2538         * Constant for the "Sora Sompeng" Unicode character block.
2539         * @since 1.8
2540         */
2541        public static final UnicodeBlock SORA_SOMPENG =
2542            new UnicodeBlock("SORA_SOMPENG",
2543                             "SORA SOMPENG",
2544                             "SORASOMPENG");
2545
2546        /**
2547         * Constant for the "Chakma" Unicode character block.
2548         * @since 1.8
2549         */
2550        public static final UnicodeBlock CHAKMA =
2551            new UnicodeBlock("CHAKMA");
2552
2553        /**
2554         * Constant for the "Sharada" Unicode character block.
2555         * @since 1.8
2556         */
2557        public static final UnicodeBlock SHARADA =
2558            new UnicodeBlock("SHARADA");
2559
2560        /**
2561         * Constant for the "Takri" Unicode character block.
2562         * @since 1.8
2563         */
2564        public static final UnicodeBlock TAKRI =
2565            new UnicodeBlock("TAKRI");
2566
2567        /**
2568         * Constant for the "Miao" Unicode character block.
2569         * @since 1.8
2570         */
2571        public static final UnicodeBlock MIAO =
2572            new UnicodeBlock("MIAO");
2573
2574        /**
2575         * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2576         * character block.
2577         * @since 1.8
2578         */
2579        public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2580            new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2581                             "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2582                             "ARABICMATHEMATICALALPHABETICSYMBOLS");
2583
2584        private static final int blockStarts[] = {
2585            0x0000,   // 0000..007F; Basic Latin
2586            0x0080,   // 0080..00FF; Latin-1 Supplement
2587            0x0100,   // 0100..017F; Latin Extended-A
2588            0x0180,   // 0180..024F; Latin Extended-B
2589            0x0250,   // 0250..02AF; IPA Extensions
2590            0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2591            0x0300,   // 0300..036F; Combining Diacritical Marks
2592            0x0370,   // 0370..03FF; Greek and Coptic
2593            0x0400,   // 0400..04FF; Cyrillic
2594            0x0500,   // 0500..052F; Cyrillic Supplement
2595            0x0530,   // 0530..058F; Armenian
2596            0x0590,   // 0590..05FF; Hebrew
2597            0x0600,   // 0600..06FF; Arabic
2598            0x0700,   // 0700..074F; Syriac
2599            0x0750,   // 0750..077F; Arabic Supplement
2600            0x0780,   // 0780..07BF; Thaana
2601            0x07C0,   // 07C0..07FF; NKo
2602            0x0800,   // 0800..083F; Samaritan
2603            0x0840,   // 0840..085F; Mandaic
2604            0x0860,   //             unassigned
2605            0x08A0,   // 08A0..08FF; Arabic Extended-A
2606            0x0900,   // 0900..097F; Devanagari
2607            0x0980,   // 0980..09FF; Bengali
2608            0x0A00,   // 0A00..0A7F; Gurmukhi
2609            0x0A80,   // 0A80..0AFF; Gujarati
2610            0x0B00,   // 0B00..0B7F; Oriya
2611            0x0B80,   // 0B80..0BFF; Tamil
2612            0x0C00,   // 0C00..0C7F; Telugu
2613            0x0C80,   // 0C80..0CFF; Kannada
2614            0x0D00,   // 0D00..0D7F; Malayalam
2615            0x0D80,   // 0D80..0DFF; Sinhala
2616            0x0E00,   // 0E00..0E7F; Thai
2617            0x0E80,   // 0E80..0EFF; Lao
2618            0x0F00,   // 0F00..0FFF; Tibetan
2619            0x1000,   // 1000..109F; Myanmar
2620            0x10A0,   // 10A0..10FF; Georgian
2621            0x1100,   // 1100..11FF; Hangul Jamo
2622            0x1200,   // 1200..137F; Ethiopic
2623            0x1380,   // 1380..139F; Ethiopic Supplement
2624            0x13A0,   // 13A0..13FF; Cherokee
2625            0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2626            0x1680,   // 1680..169F; Ogham
2627            0x16A0,   // 16A0..16FF; Runic
2628            0x1700,   // 1700..171F; Tagalog
2629            0x1720,   // 1720..173F; Hanunoo
2630            0x1740,   // 1740..175F; Buhid
2631            0x1760,   // 1760..177F; Tagbanwa
2632            0x1780,   // 1780..17FF; Khmer
2633            0x1800,   // 1800..18AF; Mongolian
2634            0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2635            0x1900,   // 1900..194F; Limbu
2636            0x1950,   // 1950..197F; Tai Le
2637            0x1980,   // 1980..19DF; New Tai Lue
2638            0x19E0,   // 19E0..19FF; Khmer Symbols
2639            0x1A00,   // 1A00..1A1F; Buginese
2640            0x1A20,   // 1A20..1AAF; Tai Tham
2641            0x1AB0,   //             unassigned
2642            0x1B00,   // 1B00..1B7F; Balinese
2643            0x1B80,   // 1B80..1BBF; Sundanese
2644            0x1BC0,   // 1BC0..1BFF; Batak
2645            0x1C00,   // 1C00..1C4F; Lepcha
2646            0x1C50,   // 1C50..1C7F; Ol Chiki
2647            0x1C80,   //             unassigned
2648            0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2649            0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2650            0x1D00,   // 1D00..1D7F; Phonetic Extensions
2651            0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2652            0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2653            0x1E00,   // 1E00..1EFF; Latin Extended Additional
2654            0x1F00,   // 1F00..1FFF; Greek Extended
2655            0x2000,   // 2000..206F; General Punctuation
2656            0x2070,   // 2070..209F; Superscripts and Subscripts
2657            0x20A0,   // 20A0..20CF; Currency Symbols
2658            0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2659            0x2100,   // 2100..214F; Letterlike Symbols
2660            0x2150,   // 2150..218F; Number Forms
2661            0x2190,   // 2190..21FF; Arrows
2662            0x2200,   // 2200..22FF; Mathematical Operators
2663            0x2300,   // 2300..23FF; Miscellaneous Technical
2664            0x2400,   // 2400..243F; Control Pictures
2665            0x2440,   // 2440..245F; Optical Character Recognition
2666            0x2460,   // 2460..24FF; Enclosed Alphanumerics
2667            0x2500,   // 2500..257F; Box Drawing
2668            0x2580,   // 2580..259F; Block Elements
2669            0x25A0,   // 25A0..25FF; Geometric Shapes
2670            0x2600,   // 2600..26FF; Miscellaneous Symbols
2671            0x2700,   // 2700..27BF; Dingbats
2672            0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2673            0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2674            0x2800,   // 2800..28FF; Braille Patterns
2675            0x2900,   // 2900..297F; Supplemental Arrows-B
2676            0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2677            0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2678            0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2679            0x2C00,   // 2C00..2C5F; Glagolitic
2680            0x2C60,   // 2C60..2C7F; Latin Extended-C
2681            0x2C80,   // 2C80..2CFF; Coptic
2682            0x2D00,   // 2D00..2D2F; Georgian Supplement
2683            0x2D30,   // 2D30..2D7F; Tifinagh
2684            0x2D80,   // 2D80..2DDF; Ethiopic Extended
2685            0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2686            0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2687            0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2688            0x2F00,   // 2F00..2FDF; Kangxi Radicals
2689            0x2FE0,   //             unassigned
2690            0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2691            0x3000,   // 3000..303F; CJK Symbols and Punctuation
2692            0x3040,   // 3040..309F; Hiragana
2693            0x30A0,   // 30A0..30FF; Katakana
2694            0x3100,   // 3100..312F; Bopomofo
2695            0x3130,   // 3130..318F; Hangul Compatibility Jamo
2696            0x3190,   // 3190..319F; Kanbun
2697            0x31A0,   // 31A0..31BF; Bopomofo Extended
2698            0x31C0,   // 31C0..31EF; CJK Strokes
2699            0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2700            0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2701            0x3300,   // 3300..33FF; CJK Compatibility
2702            0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2703            0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2704            0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2705            0xA000,   // A000..A48F; Yi Syllables
2706            0xA490,   // A490..A4CF; Yi Radicals
2707            0xA4D0,   // A4D0..A4FF; Lisu
2708            0xA500,   // A500..A63F; Vai
2709            0xA640,   // A640..A69F; Cyrillic Extended-B
2710            0xA6A0,   // A6A0..A6FF; Bamum
2711            0xA700,   // A700..A71F; Modifier Tone Letters
2712            0xA720,   // A720..A7FF; Latin Extended-D
2713            0xA800,   // A800..A82F; Syloti Nagri
2714            0xA830,   // A830..A83F; Common Indic Number Forms
2715            0xA840,   // A840..A87F; Phags-pa
2716            0xA880,   // A880..A8DF; Saurashtra
2717            0xA8E0,   // A8E0..A8FF; Devanagari Extended
2718            0xA900,   // A900..A92F; Kayah Li
2719            0xA930,   // A930..A95F; Rejang
2720            0xA960,   // A960..A97F; Hangul Jamo Extended-A
2721            0xA980,   // A980..A9DF; Javanese
2722            0xA9E0,   //             unassigned
2723            0xAA00,   // AA00..AA5F; Cham
2724            0xAA60,   // AA60..AA7F; Myanmar Extended-A
2725            0xAA80,   // AA80..AADF; Tai Viet
2726            0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2727            0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2728            0xAB30,   //             unassigned
2729            0xABC0,   // ABC0..ABFF; Meetei Mayek
2730            0xAC00,   // AC00..D7AF; Hangul Syllables
2731            0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2732            0xD800,   // D800..DB7F; High Surrogates
2733            0xDB80,   // DB80..DBFF; High Private Use Surrogates
2734            0xDC00,   // DC00..DFFF; Low Surrogates
2735            0xE000,   // E000..F8FF; Private Use Area
2736            0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2737            0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2738            0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2739            0xFE00,   // FE00..FE0F; Variation Selectors
2740            0xFE10,   // FE10..FE1F; Vertical Forms
2741            0xFE20,   // FE20..FE2F; Combining Half Marks
2742            0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2743            0xFE50,   // FE50..FE6F; Small Form Variants
2744            0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2745            0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2746            0xFFF0,   // FFF0..FFFF; Specials
2747            0x10000,  // 10000..1007F; Linear B Syllabary
2748            0x10080,  // 10080..100FF; Linear B Ideograms
2749            0x10100,  // 10100..1013F; Aegean Numbers
2750            0x10140,  // 10140..1018F; Ancient Greek Numbers
2751            0x10190,  // 10190..101CF; Ancient Symbols
2752            0x101D0,  // 101D0..101FF; Phaistos Disc
2753            0x10200,  //               unassigned
2754            0x10280,  // 10280..1029F; Lycian
2755            0x102A0,  // 102A0..102DF; Carian
2756            0x102E0,  //               unassigned
2757            0x10300,  // 10300..1032F; Old Italic
2758            0x10330,  // 10330..1034F; Gothic
2759            0x10350,  //               unassigned
2760            0x10380,  // 10380..1039F; Ugaritic
2761            0x103A0,  // 103A0..103DF; Old Persian
2762            0x103E0,  //               unassigned
2763            0x10400,  // 10400..1044F; Deseret
2764            0x10450,  // 10450..1047F; Shavian
2765            0x10480,  // 10480..104AF; Osmanya
2766            0x104B0,  //               unassigned
2767            0x10800,  // 10800..1083F; Cypriot Syllabary
2768            0x10840,  // 10840..1085F; Imperial Aramaic
2769            0x10860,  //               unassigned
2770            0x10900,  // 10900..1091F; Phoenician
2771            0x10920,  // 10920..1093F; Lydian
2772            0x10940,  //               unassigned
2773            0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2774            0x109A0,  // 109A0..109FF; Meroitic Cursive
2775            0x10A00,  // 10A00..10A5F; Kharoshthi
2776            0x10A60,  // 10A60..10A7F; Old South Arabian
2777            0x10A80,  //               unassigned
2778            0x10B00,  // 10B00..10B3F; Avestan
2779            0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2780            0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2781            0x10B80,  //               unassigned
2782            0x10C00,  // 10C00..10C4F; Old Turkic
2783            0x10C50,  //               unassigned
2784            0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2785            0x10E80,  //               unassigned
2786            0x11000,  // 11000..1107F; Brahmi
2787            0x11080,  // 11080..110CF; Kaithi
2788            0x110D0,  // 110D0..110FF; Sora Sompeng
2789            0x11100,  // 11100..1114F; Chakma
2790            0x11150,  //               unassigned
2791            0x11180,  // 11180..111DF; Sharada
2792            0x111E0,  //               unassigned
2793            0x11680,  // 11680..116CF; Takri
2794            0x116D0,  //               unassigned
2795            0x12000,  // 12000..123FF; Cuneiform
2796            0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2797            0x12480,  //               unassigned
2798            0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2799            0x13430,  //               unassigned
2800            0x16800,  // 16800..16A3F; Bamum Supplement
2801            0x16A40,  //               unassigned
2802            0x16F00,  // 16F00..16F9F; Miao
2803            0x16FA0,  //               unassigned
2804            0x1B000,  // 1B000..1B0FF; Kana Supplement
2805            0x1B100,  //               unassigned
2806            0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2807            0x1D100,  // 1D100..1D1FF; Musical Symbols
2808            0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2809            0x1D250,  //               unassigned
2810            0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2811            0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2812            0x1D380,  //               unassigned
2813            0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2814            0x1D800,  //               unassigned
2815            0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2816            0x1EF00,  //               unassigned
2817            0x1F000,  // 1F000..1F02F; Mahjong Tiles
2818            0x1F030,  // 1F030..1F09F; Domino Tiles
2819            0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2820            0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2821            0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2822            0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2823            0x1F600,  // 1F600..1F64F; Emoticons
2824            0x1F650,  //               unassigned
2825            0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2826            0x1F700,  // 1F700..1F77F; Alchemical Symbols
2827            0x1F780,  //               unassigned
2828            0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2829            0x2A6E0,  //               unassigned
2830            0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2831            0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2832            0x2B820,  //               unassigned
2833            0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2834            0x2FA20,  //               unassigned
2835            0xE0000,  // E0000..E007F; Tags
2836            0xE0080,  //               unassigned
2837            0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2838            0xE01F0,  //               unassigned
2839            0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2840            0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2841        };
2842
2843        private static final UnicodeBlock[] blocks = {
2844            BASIC_LATIN,
2845            LATIN_1_SUPPLEMENT,
2846            LATIN_EXTENDED_A,
2847            LATIN_EXTENDED_B,
2848            IPA_EXTENSIONS,
2849            SPACING_MODIFIER_LETTERS,
2850            COMBINING_DIACRITICAL_MARKS,
2851            GREEK,
2852            CYRILLIC,
2853            CYRILLIC_SUPPLEMENTARY,
2854            ARMENIAN,
2855            HEBREW,
2856            ARABIC,
2857            SYRIAC,
2858            ARABIC_SUPPLEMENT,
2859            THAANA,
2860            NKO,
2861            SAMARITAN,
2862            MANDAIC,
2863            null,
2864            ARABIC_EXTENDED_A,
2865            DEVANAGARI,
2866            BENGALI,
2867            GURMUKHI,
2868            GUJARATI,
2869            ORIYA,
2870            TAMIL,
2871            TELUGU,
2872            KANNADA,
2873            MALAYALAM,
2874            SINHALA,
2875            THAI,
2876            LAO,
2877            TIBETAN,
2878            MYANMAR,
2879            GEORGIAN,
2880            HANGUL_JAMO,
2881            ETHIOPIC,
2882            ETHIOPIC_SUPPLEMENT,
2883            CHEROKEE,
2884            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2885            OGHAM,
2886            RUNIC,
2887            TAGALOG,
2888            HANUNOO,
2889            BUHID,
2890            TAGBANWA,
2891            KHMER,
2892            MONGOLIAN,
2893            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2894            LIMBU,
2895            TAI_LE,
2896            NEW_TAI_LUE,
2897            KHMER_SYMBOLS,
2898            BUGINESE,
2899            TAI_THAM,
2900            null,
2901            BALINESE,
2902            SUNDANESE,
2903            BATAK,
2904            LEPCHA,
2905            OL_CHIKI,
2906            null,
2907            SUNDANESE_SUPPLEMENT,
2908            VEDIC_EXTENSIONS,
2909            PHONETIC_EXTENSIONS,
2910            PHONETIC_EXTENSIONS_SUPPLEMENT,
2911            COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2912            LATIN_EXTENDED_ADDITIONAL,
2913            GREEK_EXTENDED,
2914            GENERAL_PUNCTUATION,
2915            SUPERSCRIPTS_AND_SUBSCRIPTS,
2916            CURRENCY_SYMBOLS,
2917            COMBINING_MARKS_FOR_SYMBOLS,
2918            LETTERLIKE_SYMBOLS,
2919            NUMBER_FORMS,
2920            ARROWS,
2921            MATHEMATICAL_OPERATORS,
2922            MISCELLANEOUS_TECHNICAL,
2923            CONTROL_PICTURES,
2924            OPTICAL_CHARACTER_RECOGNITION,
2925            ENCLOSED_ALPHANUMERICS,
2926            BOX_DRAWING,
2927            BLOCK_ELEMENTS,
2928            GEOMETRIC_SHAPES,
2929            MISCELLANEOUS_SYMBOLS,
2930            DINGBATS,
2931            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2932            SUPPLEMENTAL_ARROWS_A,
2933            BRAILLE_PATTERNS,
2934            SUPPLEMENTAL_ARROWS_B,
2935            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2936            SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2937            MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2938            GLAGOLITIC,
2939            LATIN_EXTENDED_C,
2940            COPTIC,
2941            GEORGIAN_SUPPLEMENT,
2942            TIFINAGH,
2943            ETHIOPIC_EXTENDED,
2944            CYRILLIC_EXTENDED_A,
2945            SUPPLEMENTAL_PUNCTUATION,
2946            CJK_RADICALS_SUPPLEMENT,
2947            KANGXI_RADICALS,
2948            null,
2949            IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2950            CJK_SYMBOLS_AND_PUNCTUATION,
2951            HIRAGANA,
2952            KATAKANA,
2953            BOPOMOFO,
2954            HANGUL_COMPATIBILITY_JAMO,
2955            KANBUN,
2956            BOPOMOFO_EXTENDED,
2957            CJK_STROKES,
2958            KATAKANA_PHONETIC_EXTENSIONS,
2959            ENCLOSED_CJK_LETTERS_AND_MONTHS,
2960            CJK_COMPATIBILITY,
2961            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2962            YIJING_HEXAGRAM_SYMBOLS,
2963            CJK_UNIFIED_IDEOGRAPHS,
2964            YI_SYLLABLES,
2965            YI_RADICALS,
2966            LISU,
2967            VAI,
2968            CYRILLIC_EXTENDED_B,
2969            BAMUM,
2970            MODIFIER_TONE_LETTERS,
2971            LATIN_EXTENDED_D,
2972            SYLOTI_NAGRI,
2973            COMMON_INDIC_NUMBER_FORMS,
2974            PHAGS_PA,
2975            SAURASHTRA,
2976            DEVANAGARI_EXTENDED,
2977            KAYAH_LI,
2978            REJANG,
2979            HANGUL_JAMO_EXTENDED_A,
2980            JAVANESE,
2981            null,
2982            CHAM,
2983            MYANMAR_EXTENDED_A,
2984            TAI_VIET,
2985            MEETEI_MAYEK_EXTENSIONS,
2986            ETHIOPIC_EXTENDED_A,
2987            null,
2988            MEETEI_MAYEK,
2989            HANGUL_SYLLABLES,
2990            HANGUL_JAMO_EXTENDED_B,
2991            HIGH_SURROGATES,
2992            HIGH_PRIVATE_USE_SURROGATES,
2993            LOW_SURROGATES,
2994            PRIVATE_USE_AREA,
2995            CJK_COMPATIBILITY_IDEOGRAPHS,
2996            ALPHABETIC_PRESENTATION_FORMS,
2997            ARABIC_PRESENTATION_FORMS_A,
2998            VARIATION_SELECTORS,
2999            VERTICAL_FORMS,
3000            COMBINING_HALF_MARKS,
3001            CJK_COMPATIBILITY_FORMS,
3002            SMALL_FORM_VARIANTS,
3003            ARABIC_PRESENTATION_FORMS_B,
3004            HALFWIDTH_AND_FULLWIDTH_FORMS,
3005            SPECIALS,
3006            LINEAR_B_SYLLABARY,
3007            LINEAR_B_IDEOGRAMS,
3008            AEGEAN_NUMBERS,
3009            ANCIENT_GREEK_NUMBERS,
3010            ANCIENT_SYMBOLS,
3011            PHAISTOS_DISC,
3012            null,
3013            LYCIAN,
3014            CARIAN,
3015            null,
3016            OLD_ITALIC,
3017            GOTHIC,
3018            null,
3019            UGARITIC,
3020            OLD_PERSIAN,
3021            null,
3022            DESERET,
3023            SHAVIAN,
3024            OSMANYA,
3025            null,
3026            CYPRIOT_SYLLABARY,
3027            IMPERIAL_ARAMAIC,
3028            null,
3029            PHOENICIAN,
3030            LYDIAN,
3031            null,
3032            MEROITIC_HIEROGLYPHS,
3033            MEROITIC_CURSIVE,
3034            KHAROSHTHI,
3035            OLD_SOUTH_ARABIAN,
3036            null,
3037            AVESTAN,
3038            INSCRIPTIONAL_PARTHIAN,
3039            INSCRIPTIONAL_PAHLAVI,
3040            null,
3041            OLD_TURKIC,
3042            null,
3043            RUMI_NUMERAL_SYMBOLS,
3044            null,
3045            BRAHMI,
3046            KAITHI,
3047            SORA_SOMPENG,
3048            CHAKMA,
3049            null,
3050            SHARADA,
3051            null,
3052            TAKRI,
3053            null,
3054            CUNEIFORM,
3055            CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3056            null,
3057            EGYPTIAN_HIEROGLYPHS,
3058            null,
3059            BAMUM_SUPPLEMENT,
3060            null,
3061            MIAO,
3062            null,
3063            KANA_SUPPLEMENT,
3064            null,
3065            BYZANTINE_MUSICAL_SYMBOLS,
3066            MUSICAL_SYMBOLS,
3067            ANCIENT_GREEK_MUSICAL_NOTATION,
3068            null,
3069            TAI_XUAN_JING_SYMBOLS,
3070            COUNTING_ROD_NUMERALS,
3071            null,
3072            MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3073            null,
3074            ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3075            null,
3076            MAHJONG_TILES,
3077            DOMINO_TILES,
3078            PLAYING_CARDS,
3079            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3080            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3081            MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3082            EMOTICONS,
3083            null,
3084            TRANSPORT_AND_MAP_SYMBOLS,
3085            ALCHEMICAL_SYMBOLS,
3086            null,
3087            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3088            null,
3089            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3090            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3091            null,
3092            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3093            null,
3094            TAGS,
3095            null,
3096            VARIATION_SELECTORS_SUPPLEMENT,
3097            null,
3098            SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3099            SUPPLEMENTARY_PRIVATE_USE_AREA_B
3100        };
3101
3102
3103        /**
3104         * Returns the object representing the Unicode block containing the
3105         * given character, or {@code null} if the character is not a
3106         * member of a defined block.
3107         *
3108         * <p><b>Note:</b> This method cannot handle
3109         * <a href="Character.html#supplementary"> supplementary
3110         * characters</a>.  To support all Unicode characters, including
3111         * supplementary characters, use the {@link #of(int)} method.
3112         *
3113         * @param   c  The character in question
3114         * @return  The {@code UnicodeBlock} instance representing the
3115         *          Unicode block of which this character is a member, or
3116         *          {@code null} if the character is not a member of any
3117         *          Unicode block
3118         */
3119        public static UnicodeBlock of(char c) {
3120            return of((int)c);
3121        }
3122
3123        /**
3124         * Returns the object representing the Unicode block
3125         * containing the given character (Unicode code point), or
3126         * {@code null} if the character is not a member of a
3127         * defined block.
3128         *
3129         * @param   codePoint the character (Unicode code point) in question.
3130         * @return  The {@code UnicodeBlock} instance representing the
3131         *          Unicode block of which this character is a member, or
3132         *          {@code null} if the character is not a member of any
3133         *          Unicode block
3134         * @exception IllegalArgumentException if the specified
3135         * {@code codePoint} is an invalid Unicode code point.
3136         * @see Character#isValidCodePoint(int)
3137         * @since   1.5
3138         */
3139        public static UnicodeBlock of(int codePoint) {
3140            if (!isValidCodePoint(codePoint)) {
3141                throw new IllegalArgumentException();
3142            }
3143
3144            int top, bottom, current;
3145            bottom = 0;
3146            top = blockStarts.length;
3147            current = top/2;
3148
3149            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3150            while (top - bottom > 1) {
3151                if (codePoint >= blockStarts[current]) {
3152                    bottom = current;
3153                } else {
3154                    top = current;
3155                }
3156                current = (top + bottom) / 2;
3157            }
3158            return blocks[current];
3159        }
3160
3161        /**
3162         * Returns the UnicodeBlock with the given name. Block
3163         * names are determined by The Unicode Standard. The file
3164         * Blocks-&lt;version&gt;.txt defines blocks for a particular
3165         * version of the standard. The {@link Character} class specifies
3166         * the version of the standard that it supports.
3167         * <p>
3168         * This method accepts block names in the following forms:
3169         * <ol>
3170         * <li> Canonical block names as defined by the Unicode Standard.
3171         * For example, the standard defines a "Basic Latin" block. Therefore, this
3172         * method accepts "Basic Latin" as a valid block name. The documentation of
3173         * each UnicodeBlock provides the canonical name.
3174         * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3175         * is a valid block name for the "Basic Latin" block.
3176         * <li>The text representation of each constant UnicodeBlock identifier.
3177         * For example, this method will return the {@link #BASIC_LATIN} block if
3178         * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3179         * hyphens in the canonical name with underscores.
3180         * </ol>
3181         * Finally, character case is ignored for all of the valid block name forms.
3182         * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3183         * The en_US locale's case mapping rules are used to provide case-insensitive
3184         * string comparisons for block name validation.
3185         * <p>
3186         * If the Unicode Standard changes block names, both the previous and
3187         * current names will be accepted.
3188         *
3189         * @param blockName A {@code UnicodeBlock} name.
3190         * @return The {@code UnicodeBlock} instance identified
3191         *         by {@code blockName}
3192         * @throws IllegalArgumentException if {@code blockName} is an
3193         *         invalid name
3194         * @throws NullPointerException if {@code blockName} is null
3195         * @since 1.5
3196         */
3197        public static final UnicodeBlock forName(String blockName) {
3198            UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3199            if (block == null) {
3200                throw new IllegalArgumentException();
3201            }
3202            return block;
3203        }
3204    }
3205
3206
3207    /**
3208     * A family of character subsets representing the character scripts
3209     * defined in the <a href="http://www.unicode.org/reports/tr24/">
3210     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3211     * character is assigned to a single Unicode script, either a specific
3212     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3213     * one of the following three special values,
3214     * {@link Character.UnicodeScript#INHERITED Inherited},
3215     * {@link Character.UnicodeScript#COMMON Common} or
3216     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3217     *
3218     * @since 1.7
3219     */
3220    public static enum UnicodeScript {
3221        /**
3222         * Unicode script "Common".
3223         */
3224        COMMON,
3225
3226        /**
3227         * Unicode script "Latin".
3228         */
3229        LATIN,
3230
3231        /**
3232         * Unicode script "Greek".
3233         */
3234        GREEK,
3235
3236        /**
3237         * Unicode script "Cyrillic".
3238         */
3239        CYRILLIC,
3240
3241        /**
3242         * Unicode script "Armenian".
3243         */
3244        ARMENIAN,
3245
3246        /**
3247         * Unicode script "Hebrew".
3248         */
3249        HEBREW,
3250
3251        /**
3252         * Unicode script "Arabic".
3253         */
3254        ARABIC,
3255
3256        /**
3257         * Unicode script "Syriac".
3258         */
3259        SYRIAC,
3260
3261        /**
3262         * Unicode script "Thaana".
3263         */
3264        THAANA,
3265
3266        /**
3267         * Unicode script "Devanagari".
3268         */
3269        DEVANAGARI,
3270
3271        /**
3272         * Unicode script "Bengali".
3273         */
3274        BENGALI,
3275
3276        /**
3277         * Unicode script "Gurmukhi".
3278         */
3279        GURMUKHI,
3280
3281        /**
3282         * Unicode script "Gujarati".
3283         */
3284        GUJARATI,
3285
3286        /**
3287         * Unicode script "Oriya".
3288         */
3289        ORIYA,
3290
3291        /**
3292         * Unicode script "Tamil".
3293         */
3294        TAMIL,
3295
3296        /**
3297         * Unicode script "Telugu".
3298         */
3299        TELUGU,
3300
3301        /**
3302         * Unicode script "Kannada".
3303         */
3304        KANNADA,
3305
3306        /**
3307         * Unicode script "Malayalam".
3308         */
3309        MALAYALAM,
3310
3311        /**
3312         * Unicode script "Sinhala".
3313         */
3314        SINHALA,
3315
3316        /**
3317         * Unicode script "Thai".
3318         */
3319        THAI,
3320
3321        /**
3322         * Unicode script "Lao".
3323         */
3324        LAO,
3325
3326        /**
3327         * Unicode script "Tibetan".
3328         */
3329        TIBETAN,
3330
3331        /**
3332         * Unicode script "Myanmar".
3333         */
3334        MYANMAR,
3335
3336        /**
3337         * Unicode script "Georgian".
3338         */
3339        GEORGIAN,
3340
3341        /**
3342         * Unicode script "Hangul".
3343         */
3344        HANGUL,
3345
3346        /**
3347         * Unicode script "Ethiopic".
3348         */
3349        ETHIOPIC,
3350
3351        /**
3352         * Unicode script "Cherokee".
3353         */
3354        CHEROKEE,
3355
3356        /**
3357         * Unicode script "Canadian_Aboriginal".
3358         */
3359        CANADIAN_ABORIGINAL,
3360
3361        /**
3362         * Unicode script "Ogham".
3363         */
3364        OGHAM,
3365
3366        /**
3367         * Unicode script "Runic".
3368         */
3369        RUNIC,
3370
3371        /**
3372         * Unicode script "Khmer".
3373         */
3374        KHMER,
3375
3376        /**
3377         * Unicode script "Mongolian".
3378         */
3379        MONGOLIAN,
3380
3381        /**
3382         * Unicode script "Hiragana".
3383         */
3384        HIRAGANA,
3385
3386        /**
3387         * Unicode script "Katakana".
3388         */
3389        KATAKANA,
3390
3391        /**
3392         * Unicode script "Bopomofo".
3393         */
3394        BOPOMOFO,
3395
3396        /**
3397         * Unicode script "Han".
3398         */
3399        HAN,
3400
3401        /**
3402         * Unicode script "Yi".
3403         */
3404        YI,
3405
3406        /**
3407         * Unicode script "Old_Italic".
3408         */
3409        OLD_ITALIC,
3410
3411        /**
3412         * Unicode script "Gothic".
3413         */
3414        GOTHIC,
3415
3416        /**
3417         * Unicode script "Deseret".
3418         */
3419        DESERET,
3420
3421        /**
3422         * Unicode script "Inherited".
3423         */
3424        INHERITED,
3425
3426        /**
3427         * Unicode script "Tagalog".
3428         */
3429        TAGALOG,
3430
3431        /**
3432         * Unicode script "Hanunoo".
3433         */
3434        HANUNOO,
3435
3436        /**
3437         * Unicode script "Buhid".
3438         */
3439        BUHID,
3440
3441        /**
3442         * Unicode script "Tagbanwa".
3443         */
3444        TAGBANWA,
3445
3446        /**
3447         * Unicode script "Limbu".
3448         */
3449        LIMBU,
3450
3451        /**
3452         * Unicode script "Tai_Le".
3453         */
3454        TAI_LE,
3455
3456        /**
3457         * Unicode script "Linear_B".
3458         */
3459        LINEAR_B,
3460
3461        /**
3462         * Unicode script "Ugaritic".
3463         */
3464        UGARITIC,
3465
3466        /**
3467         * Unicode script "Shavian".
3468         */
3469        SHAVIAN,
3470
3471        /**
3472         * Unicode script "Osmanya".
3473         */
3474        OSMANYA,
3475
3476        /**
3477         * Unicode script "Cypriot".
3478         */
3479        CYPRIOT,
3480
3481        /**
3482         * Unicode script "Braille".
3483         */
3484        BRAILLE,
3485
3486        /**
3487         * Unicode script "Buginese".
3488         */
3489        BUGINESE,
3490
3491        /**
3492         * Unicode script "Coptic".
3493         */
3494        COPTIC,
3495
3496        /**
3497         * Unicode script "New_Tai_Lue".
3498         */
3499        NEW_TAI_LUE,
3500
3501        /**
3502         * Unicode script "Glagolitic".
3503         */
3504        GLAGOLITIC,
3505
3506        /**
3507         * Unicode script "Tifinagh".
3508         */
3509        TIFINAGH,
3510
3511        /**
3512         * Unicode script "Syloti_Nagri".
3513         */
3514        SYLOTI_NAGRI,
3515
3516        /**
3517         * Unicode script "Old_Persian".
3518         */
3519        OLD_PERSIAN,
3520
3521        /**
3522         * Unicode script "Kharoshthi".
3523         */
3524        KHAROSHTHI,
3525
3526        /**
3527         * Unicode script "Balinese".
3528         */
3529        BALINESE,
3530
3531        /**
3532         * Unicode script "Cuneiform".
3533         */
3534        CUNEIFORM,
3535
3536        /**
3537         * Unicode script "Phoenician".
3538         */
3539        PHOENICIAN,
3540
3541        /**
3542         * Unicode script "Phags_Pa".
3543         */
3544        PHAGS_PA,
3545
3546        /**
3547         * Unicode script "Nko".
3548         */
3549        NKO,
3550
3551        /**
3552         * Unicode script "Sundanese".
3553         */
3554        SUNDANESE,
3555
3556        /**
3557         * Unicode script "Batak".
3558         */
3559        BATAK,
3560
3561        /**
3562         * Unicode script "Lepcha".
3563         */
3564        LEPCHA,
3565
3566        /**
3567         * Unicode script "Ol_Chiki".
3568         */
3569        OL_CHIKI,
3570
3571        /**
3572         * Unicode script "Vai".
3573         */
3574        VAI,
3575
3576        /**
3577         * Unicode script "Saurashtra".
3578         */
3579        SAURASHTRA,
3580
3581        /**
3582         * Unicode script "Kayah_Li".
3583         */
3584        KAYAH_LI,
3585
3586        /**
3587         * Unicode script "Rejang".
3588         */
3589        REJANG,
3590
3591        /**
3592         * Unicode script "Lycian".
3593         */
3594        LYCIAN,
3595
3596        /**
3597         * Unicode script "Carian".
3598         */
3599        CARIAN,
3600
3601        /**
3602         * Unicode script "Lydian".
3603         */
3604        LYDIAN,
3605
3606        /**
3607         * Unicode script "Cham".
3608         */
3609        CHAM,
3610
3611        /**
3612         * Unicode script "Tai_Tham".
3613         */
3614        TAI_THAM,
3615
3616        /**
3617         * Unicode script "Tai_Viet".
3618         */
3619        TAI_VIET,
3620
3621        /**
3622         * Unicode script "Avestan".
3623         */
3624        AVESTAN,
3625
3626        /**
3627         * Unicode script "Egyptian_Hieroglyphs".
3628         */
3629        EGYPTIAN_HIEROGLYPHS,
3630
3631        /**
3632         * Unicode script "Samaritan".
3633         */
3634        SAMARITAN,
3635
3636        /**
3637         * Unicode script "Mandaic".
3638         */
3639        MANDAIC,
3640
3641        /**
3642         * Unicode script "Lisu".
3643         */
3644        LISU,
3645
3646        /**
3647         * Unicode script "Bamum".
3648         */
3649        BAMUM,
3650
3651        /**
3652         * Unicode script "Javanese".
3653         */
3654        JAVANESE,
3655
3656        /**
3657         * Unicode script "Meetei_Mayek".
3658         */
3659        MEETEI_MAYEK,
3660
3661        /**
3662         * Unicode script "Imperial_Aramaic".
3663         */
3664        IMPERIAL_ARAMAIC,
3665
3666        /**
3667         * Unicode script "Old_South_Arabian".
3668         */
3669        OLD_SOUTH_ARABIAN,
3670
3671        /**
3672         * Unicode script "Inscriptional_Parthian".
3673         */
3674        INSCRIPTIONAL_PARTHIAN,
3675
3676        /**
3677         * Unicode script "Inscriptional_Pahlavi".
3678         */
3679        INSCRIPTIONAL_PAHLAVI,
3680
3681        /**
3682         * Unicode script "Old_Turkic".
3683         */
3684        OLD_TURKIC,
3685
3686        /**
3687         * Unicode script "Brahmi".
3688         */
3689        BRAHMI,
3690
3691        /**
3692         * Unicode script "Kaithi".
3693         */
3694        KAITHI,
3695
3696        /**
3697         * Unicode script "Meroitic Hieroglyphs".
3698         */
3699        MEROITIC_HIEROGLYPHS,
3700
3701        /**
3702         * Unicode script "Meroitic Cursive".
3703         */
3704        MEROITIC_CURSIVE,
3705
3706        /**
3707         * Unicode script "Sora Sompeng".
3708         */
3709        SORA_SOMPENG,
3710
3711        /**
3712         * Unicode script "Chakma".
3713         */
3714        CHAKMA,
3715
3716        /**
3717         * Unicode script "Sharada".
3718         */
3719        SHARADA,
3720
3721        /**
3722         * Unicode script "Takri".
3723         */
3724        TAKRI,
3725
3726        /**
3727         * Unicode script "Miao".
3728         */
3729        MIAO,
3730
3731        /**
3732         * Unicode script "Unknown".
3733         */
3734        UNKNOWN;
3735
3736        private static final int[] scriptStarts = {
3737            0x0000,   // 0000..0040; COMMON
3738            0x0041,   // 0041..005A; LATIN
3739            0x005B,   // 005B..0060; COMMON
3740            0x0061,   // 0061..007A; LATIN
3741            0x007B,   // 007B..00A9; COMMON
3742            0x00AA,   // 00AA..00AA; LATIN
3743            0x00AB,   // 00AB..00B9; COMMON
3744            0x00BA,   // 00BA..00BA; LATIN
3745            0x00BB,   // 00BB..00BF; COMMON
3746            0x00C0,   // 00C0..00D6; LATIN
3747            0x00D7,   // 00D7..00D7; COMMON
3748            0x00D8,   // 00D8..00F6; LATIN
3749            0x00F7,   // 00F7..00F7; COMMON
3750            0x00F8,   // 00F8..02B8; LATIN
3751            0x02B9,   // 02B9..02DF; COMMON
3752            0x02E0,   // 02E0..02E4; LATIN
3753            0x02E5,   // 02E5..02E9; COMMON
3754            0x02EA,   // 02EA..02EB; BOPOMOFO
3755            0x02EC,   // 02EC..02FF; COMMON
3756            0x0300,   // 0300..036F; INHERITED
3757            0x0370,   // 0370..0373; GREEK
3758            0x0374,   // 0374..0374; COMMON
3759            0x0375,   // 0375..037D; GREEK
3760            0x037E,   // 037E..0383; COMMON
3761            0x0384,   // 0384..0384; GREEK
3762            0x0385,   // 0385..0385; COMMON
3763            0x0386,   // 0386..0386; GREEK
3764            0x0387,   // 0387..0387; COMMON
3765            0x0388,   // 0388..03E1; GREEK
3766            0x03E2,   // 03E2..03EF; COPTIC
3767            0x03F0,   // 03F0..03FF; GREEK
3768            0x0400,   // 0400..0484; CYRILLIC
3769            0x0485,   // 0485..0486; INHERITED
3770            0x0487,   // 0487..0530; CYRILLIC
3771            0x0531,   // 0531..0588; ARMENIAN
3772            0x0589,   // 0589..0589; COMMON
3773            0x058A,   // 058A..0590; ARMENIAN
3774            0x0591,   // 0591..05FF; HEBREW
3775            0x0600,   // 0600..060B; ARABIC
3776            0x060C,   // 060C..060C; COMMON
3777            0x060D,   // 060D..061A; ARABIC
3778            0x061B,   // 061B..061D; COMMON
3779            0x061E,   // 061E..061E; ARABIC
3780            0x061F,   // 061F..061F; COMMON
3781            0x0620,   // 0620..063F; ARABIC
3782            0x0640,   // 0640..0640; COMMON
3783            0x0641,   // 0641..064A; ARABIC
3784            0x064B,   // 064B..0655; INHERITED
3785            0x0656,   // 0656..065F; ARABIC
3786            0x0660,   // 0660..0669; COMMON
3787            0x066A,   // 066A..066F; ARABIC
3788            0x0670,   // 0670..0670; INHERITED
3789            0x0671,   // 0671..06DC; ARABIC
3790            0x06DD,   // 06DD..06DD; COMMON
3791            0x06DE,   // 06DE..06FF; ARABIC
3792            0x0700,   // 0700..074F; SYRIAC
3793            0x0750,   // 0750..077F; ARABIC
3794            0x0780,   // 0780..07BF; THAANA
3795            0x07C0,   // 07C0..07FF; NKO
3796            0x0800,   // 0800..083F; SAMARITAN
3797            0x0840,   // 0840..089F; MANDAIC
3798            0x08A0,   // 08A0..08FF; ARABIC
3799            0x0900,   // 0900..0950; DEVANAGARI
3800            0x0951,   // 0951..0952; INHERITED
3801            0x0953,   // 0953..0963; DEVANAGARI
3802            0x0964,   // 0964..0965; COMMON
3803            0x0966,   // 0966..0980; DEVANAGARI
3804            0x0981,   // 0981..0A00; BENGALI
3805            0x0A01,   // 0A01..0A80; GURMUKHI
3806            0x0A81,   // 0A81..0B00; GUJARATI
3807            0x0B01,   // 0B01..0B81; ORIYA
3808            0x0B82,   // 0B82..0C00; TAMIL
3809            0x0C01,   // 0C01..0C81; TELUGU
3810            0x0C82,   // 0C82..0CF0; KANNADA
3811            0x0D02,   // 0D02..0D81; MALAYALAM
3812            0x0D82,   // 0D82..0E00; SINHALA
3813            0x0E01,   // 0E01..0E3E; THAI
3814            0x0E3F,   // 0E3F..0E3F; COMMON
3815            0x0E40,   // 0E40..0E80; THAI
3816            0x0E81,   // 0E81..0EFF; LAO
3817            0x0F00,   // 0F00..0FD4; TIBETAN
3818            0x0FD5,   // 0FD5..0FD8; COMMON
3819            0x0FD9,   // 0FD9..0FFF; TIBETAN
3820            0x1000,   // 1000..109F; MYANMAR
3821            0x10A0,   // 10A0..10FA; GEORGIAN
3822            0x10FB,   // 10FB..10FB; COMMON
3823            0x10FC,   // 10FC..10FF; GEORGIAN
3824            0x1100,   // 1100..11FF; HANGUL
3825            0x1200,   // 1200..139F; ETHIOPIC
3826            0x13A0,   // 13A0..13FF; CHEROKEE
3827            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3828            0x1680,   // 1680..169F; OGHAM
3829            0x16A0,   // 16A0..16EA; RUNIC
3830            0x16EB,   // 16EB..16ED; COMMON
3831            0x16EE,   // 16EE..16FF; RUNIC
3832            0x1700,   // 1700..171F; TAGALOG
3833            0x1720,   // 1720..1734; HANUNOO
3834            0x1735,   // 1735..173F; COMMON
3835            0x1740,   // 1740..175F; BUHID
3836            0x1760,   // 1760..177F; TAGBANWA
3837            0x1780,   // 1780..17FF; KHMER
3838            0x1800,   // 1800..1801; MONGOLIAN
3839            0x1802,   // 1802..1803; COMMON
3840            0x1804,   // 1804..1804; MONGOLIAN
3841            0x1805,   // 1805..1805; COMMON
3842            0x1806,   // 1806..18AF; MONGOLIAN
3843            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3844            0x1900,   // 1900..194F; LIMBU
3845            0x1950,   // 1950..197F; TAI_LE
3846            0x1980,   // 1980..19DF; NEW_TAI_LUE
3847            0x19E0,   // 19E0..19FF; KHMER
3848            0x1A00,   // 1A00..1A1F; BUGINESE
3849            0x1A20,   // 1A20..1AFF; TAI_THAM
3850            0x1B00,   // 1B00..1B7F; BALINESE
3851            0x1B80,   // 1B80..1BBF; SUNDANESE
3852            0x1BC0,   // 1BC0..1BFF; BATAK
3853            0x1C00,   // 1C00..1C4F; LEPCHA
3854            0x1C50,   // 1C50..1CBF; OL_CHIKI
3855            0x1CC0,   // 1CC0..1CCF; SUNDANESE
3856            0x1CD0,   // 1CD0..1CD2; INHERITED
3857            0x1CD3,   // 1CD3..1CD3; COMMON
3858            0x1CD4,   // 1CD4..1CE0; INHERITED
3859            0x1CE1,   // 1CE1..1CE1; COMMON
3860            0x1CE2,   // 1CE2..1CE8; INHERITED
3861            0x1CE9,   // 1CE9..1CEC; COMMON
3862            0x1CED,   // 1CED..1CED; INHERITED
3863            0x1CEE,   // 1CEE..1CF3; COMMON
3864            0x1CF4,   // 1CF4..1CF4; INHERITED
3865            0x1CF5,   // 1CF5..1CFF; COMMON
3866            0x1D00,   // 1D00..1D25; LATIN
3867            0x1D26,   // 1D26..1D2A; GREEK
3868            0x1D2B,   // 1D2B..1D2B; CYRILLIC
3869            0x1D2C,   // 1D2C..1D5C; LATIN
3870            0x1D5D,   // 1D5D..1D61; GREEK
3871            0x1D62,   // 1D62..1D65; LATIN
3872            0x1D66,   // 1D66..1D6A; GREEK
3873            0x1D6B,   // 1D6B..1D77; LATIN
3874            0x1D78,   // 1D78..1D78; CYRILLIC
3875            0x1D79,   // 1D79..1DBE; LATIN
3876            0x1DBF,   // 1DBF..1DBF; GREEK
3877            0x1DC0,   // 1DC0..1DFF; INHERITED
3878            0x1E00,   // 1E00..1EFF; LATIN
3879            0x1F00,   // 1F00..1FFF; GREEK
3880            0x2000,   // 2000..200B; COMMON
3881            0x200C,   // 200C..200D; INHERITED
3882            0x200E,   // 200E..2070; COMMON
3883            0x2071,   // 2071..2073; LATIN
3884            0x2074,   // 2074..207E; COMMON
3885            0x207F,   // 207F..207F; LATIN
3886            0x2080,   // 2080..208F; COMMON
3887            0x2090,   // 2090..209F; LATIN
3888            0x20A0,   // 20A0..20CF; COMMON
3889            0x20D0,   // 20D0..20FF; INHERITED
3890            0x2100,   // 2100..2125; COMMON
3891            0x2126,   // 2126..2126; GREEK
3892            0x2127,   // 2127..2129; COMMON
3893            0x212A,   // 212A..212B; LATIN
3894            0x212C,   // 212C..2131; COMMON
3895            0x2132,   // 2132..2132; LATIN
3896            0x2133,   // 2133..214D; COMMON
3897            0x214E,   // 214E..214E; LATIN
3898            0x214F,   // 214F..215F; COMMON
3899            0x2160,   // 2160..2188; LATIN
3900            0x2189,   // 2189..27FF; COMMON
3901            0x2800,   // 2800..28FF; BRAILLE
3902            0x2900,   // 2900..2BFF; COMMON
3903            0x2C00,   // 2C00..2C5F; GLAGOLITIC
3904            0x2C60,   // 2C60..2C7F; LATIN
3905            0x2C80,   // 2C80..2CFF; COPTIC
3906            0x2D00,   // 2D00..2D2F; GEORGIAN
3907            0x2D30,   // 2D30..2D7F; TIFINAGH
3908            0x2D80,   // 2D80..2DDF; ETHIOPIC
3909            0x2DE0,   // 2DE0..2DFF; CYRILLIC
3910            0x2E00,   // 2E00..2E7F; COMMON
3911            0x2E80,   // 2E80..2FEF; HAN
3912            0x2FF0,   // 2FF0..3004; COMMON
3913            0x3005,   // 3005..3005; HAN
3914            0x3006,   // 3006..3006; COMMON
3915            0x3007,   // 3007..3007; HAN
3916            0x3008,   // 3008..3020; COMMON
3917            0x3021,   // 3021..3029; HAN
3918            0x302A,   // 302A..302D; INHERITED
3919            0x302E,   // 302E..302F; HANGUL
3920            0x3030,   // 3030..3037; COMMON
3921            0x3038,   // 3038..303B; HAN
3922            0x303C,   // 303C..3040; COMMON
3923            0x3041,   // 3041..3098; HIRAGANA
3924            0x3099,   // 3099..309A; INHERITED
3925            0x309B,   // 309B..309C; COMMON
3926            0x309D,   // 309D..309F; HIRAGANA
3927            0x30A0,   // 30A0..30A0; COMMON
3928            0x30A1,   // 30A1..30FA; KATAKANA
3929            0x30FB,   // 30FB..30FC; COMMON
3930            0x30FD,   // 30FD..3104; KATAKANA
3931            0x3105,   // 3105..3130; BOPOMOFO
3932            0x3131,   // 3131..318F; HANGUL
3933            0x3190,   // 3190..319F; COMMON
3934            0x31A0,   // 31A0..31BF; BOPOMOFO
3935            0x31C0,   // 31C0..31EF; COMMON
3936            0x31F0,   // 31F0..31FF; KATAKANA
3937            0x3200,   // 3200..321F; HANGUL
3938            0x3220,   // 3220..325F; COMMON
3939            0x3260,   // 3260..327E; HANGUL
3940            0x327F,   // 327F..32CF; COMMON
3941            0x32D0,   // 32D0..3357; KATAKANA
3942            0x3358,   // 3358..33FF; COMMON
3943            0x3400,   // 3400..4DBF; HAN
3944            0x4DC0,   // 4DC0..4DFF; COMMON
3945            0x4E00,   // 4E00..9FFF; HAN
3946            0xA000,   // A000..A4CF; YI
3947            0xA4D0,   // A4D0..A4FF; LISU
3948            0xA500,   // A500..A63F; VAI
3949            0xA640,   // A640..A69F; CYRILLIC
3950            0xA6A0,   // A6A0..A6FF; BAMUM
3951            0xA700,   // A700..A721; COMMON
3952            0xA722,   // A722..A787; LATIN
3953            0xA788,   // A788..A78A; COMMON
3954            0xA78B,   // A78B..A7FF; LATIN
3955            0xA800,   // A800..A82F; SYLOTI_NAGRI
3956            0xA830,   // A830..A83F; COMMON
3957            0xA840,   // A840..A87F; PHAGS_PA
3958            0xA880,   // A880..A8DF; SAURASHTRA
3959            0xA8E0,   // A8E0..A8FF; DEVANAGARI
3960            0xA900,   // A900..A92F; KAYAH_LI
3961            0xA930,   // A930..A95F; REJANG
3962            0xA960,   // A960..A97F; HANGUL
3963            0xA980,   // A980..A9FF; JAVANESE
3964            0xAA00,   // AA00..AA5F; CHAM
3965            0xAA60,   // AA60..AA7F; MYANMAR
3966            0xAA80,   // AA80..AADF; TAI_VIET
3967            0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3968            0xAB01,   // AB01..ABBF; ETHIOPIC
3969            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3970            0xAC00,   // AC00..D7FB; HANGUL
3971            0xD7FC,   // D7FC..F8FF; UNKNOWN
3972            0xF900,   // F900..FAFF; HAN
3973            0xFB00,   // FB00..FB12; LATIN
3974            0xFB13,   // FB13..FB1C; ARMENIAN
3975            0xFB1D,   // FB1D..FB4F; HEBREW
3976            0xFB50,   // FB50..FD3D; ARABIC
3977            0xFD3E,   // FD3E..FD4F; COMMON
3978            0xFD50,   // FD50..FDFC; ARABIC
3979            0xFDFD,   // FDFD..FDFF; COMMON
3980            0xFE00,   // FE00..FE0F; INHERITED
3981            0xFE10,   // FE10..FE1F; COMMON
3982            0xFE20,   // FE20..FE2F; INHERITED
3983            0xFE30,   // FE30..FE6F; COMMON
3984            0xFE70,   // FE70..FEFE; ARABIC
3985            0xFEFF,   // FEFF..FF20; COMMON
3986            0xFF21,   // FF21..FF3A; LATIN
3987            0xFF3B,   // FF3B..FF40; COMMON
3988            0xFF41,   // FF41..FF5A; LATIN
3989            0xFF5B,   // FF5B..FF65; COMMON
3990            0xFF66,   // FF66..FF6F; KATAKANA
3991            0xFF70,   // FF70..FF70; COMMON
3992            0xFF71,   // FF71..FF9D; KATAKANA
3993            0xFF9E,   // FF9E..FF9F; COMMON
3994            0xFFA0,   // FFA0..FFDF; HANGUL
3995            0xFFE0,   // FFE0..FFFF; COMMON
3996            0x10000,  // 10000..100FF; LINEAR_B
3997            0x10100,  // 10100..1013F; COMMON
3998            0x10140,  // 10140..1018F; GREEK
3999            0x10190,  // 10190..101FC; COMMON
4000            0x101FD,  // 101FD..1027F; INHERITED
4001            0x10280,  // 10280..1029F; LYCIAN
4002            0x102A0,  // 102A0..102FF; CARIAN
4003            0x10300,  // 10300..1032F; OLD_ITALIC
4004            0x10330,  // 10330..1037F; GOTHIC
4005            0x10380,  // 10380..1039F; UGARITIC
4006            0x103A0,  // 103A0..103FF; OLD_PERSIAN
4007            0x10400,  // 10400..1044F; DESERET
4008            0x10450,  // 10450..1047F; SHAVIAN
4009            0x10480,  // 10480..107FF; OSMANYA
4010            0x10800,  // 10800..1083F; CYPRIOT
4011            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
4012            0x10900,  // 10900..1091F; PHOENICIAN
4013            0x10920,  // 10920..1097F; LYDIAN
4014            0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
4015            0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
4016            0x10A00,  // 10A00..10A5F; KHAROSHTHI
4017            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4018            0x10B00,  // 10B00..10B3F; AVESTAN
4019            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4020            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4021            0x10C00,  // 10C00..10E5F; OLD_TURKIC
4022            0x10E60,  // 10E60..10FFF; ARABIC
4023            0x11000,  // 11000..1107F; BRAHMI
4024            0x11080,  // 11080..110CF; KAITHI
4025            0x110D0,  // 110D0..110FF; SORA_SOMPENG
4026            0x11100,  // 11100..1117F; CHAKMA
4027            0x11180,  // 11180..1167F; SHARADA
4028            0x11680,  // 11680..116CF; TAKRI
4029            0x12000,  // 12000..12FFF; CUNEIFORM
4030            0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4031            0x16800,  // 16800..16A38; BAMUM
4032            0x16F00,  // 16F00..16F9F; MIAO
4033            0x1B000,  // 1B000..1B000; KATAKANA
4034            0x1B001,  // 1B001..1CFFF; HIRAGANA
4035            0x1D000,  // 1D000..1D166; COMMON
4036            0x1D167,  // 1D167..1D169; INHERITED
4037            0x1D16A,  // 1D16A..1D17A; COMMON
4038            0x1D17B,  // 1D17B..1D182; INHERITED
4039            0x1D183,  // 1D183..1D184; COMMON
4040            0x1D185,  // 1D185..1D18B; INHERITED
4041            0x1D18C,  // 1D18C..1D1A9; COMMON
4042            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4043            0x1D1AE,  // 1D1AE..1D1FF; COMMON
4044            0x1D200,  // 1D200..1D2FF; GREEK
4045            0x1D300,  // 1D300..1EDFF; COMMON
4046            0x1EE00,  // 1EE00..1EFFF; ARABIC
4047            0x1F000,  // 1F000..1F1FF; COMMON
4048            0x1F200,  // 1F200..1F200; HIRAGANA
4049            0x1F201,  // 1F210..1FFFF; COMMON
4050            0x20000,  // 20000..E0000; HAN
4051            0xE0001,  // E0001..E00FF; COMMON
4052            0xE0100,  // E0100..E01EF; INHERITED
4053            0xE01F0   // E01F0..10FFFF; UNKNOWN
4054
4055        };
4056
4057        private static final UnicodeScript[] scripts = {
4058            COMMON,
4059            LATIN,
4060            COMMON,
4061            LATIN,
4062            COMMON,
4063            LATIN,
4064            COMMON,
4065            LATIN,
4066            COMMON,
4067            LATIN,
4068            COMMON,
4069            LATIN,
4070            COMMON,
4071            LATIN,
4072            COMMON,
4073            LATIN,
4074            COMMON,
4075            BOPOMOFO,
4076            COMMON,
4077            INHERITED,
4078            GREEK,
4079            COMMON,
4080            GREEK,
4081            COMMON,
4082            GREEK,
4083            COMMON,
4084            GREEK,
4085            COMMON,
4086            GREEK,
4087            COPTIC,
4088            GREEK,
4089            CYRILLIC,
4090            INHERITED,
4091            CYRILLIC,
4092            ARMENIAN,
4093            COMMON,
4094            ARMENIAN,
4095            HEBREW,
4096            ARABIC,
4097            COMMON,
4098            ARABIC,
4099            COMMON,
4100            ARABIC,
4101            COMMON,
4102            ARABIC,
4103            COMMON,
4104            ARABIC,
4105            INHERITED,
4106            ARABIC,
4107            COMMON,
4108            ARABIC,
4109            INHERITED,
4110            ARABIC,
4111            COMMON,
4112            ARABIC,
4113            SYRIAC,
4114            ARABIC,
4115            THAANA,
4116            NKO,
4117            SAMARITAN,
4118            MANDAIC,
4119            ARABIC,
4120            DEVANAGARI,
4121            INHERITED,
4122            DEVANAGARI,
4123            COMMON,
4124            DEVANAGARI,
4125            BENGALI,
4126            GURMUKHI,
4127            GUJARATI,
4128            ORIYA,
4129            TAMIL,
4130            TELUGU,
4131            KANNADA,
4132            MALAYALAM,
4133            SINHALA,
4134            THAI,
4135            COMMON,
4136            THAI,
4137            LAO,
4138            TIBETAN,
4139            COMMON,
4140            TIBETAN,
4141            MYANMAR,
4142            GEORGIAN,
4143            COMMON,
4144            GEORGIAN,
4145            HANGUL,
4146            ETHIOPIC,
4147            CHEROKEE,
4148            CANADIAN_ABORIGINAL,
4149            OGHAM,
4150            RUNIC,
4151            COMMON,
4152            RUNIC,
4153            TAGALOG,
4154            HANUNOO,
4155            COMMON,
4156            BUHID,
4157            TAGBANWA,
4158            KHMER,
4159            MONGOLIAN,
4160            COMMON,
4161            MONGOLIAN,
4162            COMMON,
4163            MONGOLIAN,
4164            CANADIAN_ABORIGINAL,
4165            LIMBU,
4166            TAI_LE,
4167            NEW_TAI_LUE,
4168            KHMER,
4169            BUGINESE,
4170            TAI_THAM,
4171            BALINESE,
4172            SUNDANESE,
4173            BATAK,
4174            LEPCHA,
4175            OL_CHIKI,
4176            SUNDANESE,
4177            INHERITED,
4178            COMMON,
4179            INHERITED,
4180            COMMON,
4181            INHERITED,
4182            COMMON,
4183            INHERITED,
4184            COMMON,
4185            INHERITED,
4186            COMMON,
4187            LATIN,
4188            GREEK,
4189            CYRILLIC,
4190            LATIN,
4191            GREEK,
4192            LATIN,
4193            GREEK,
4194            LATIN,
4195            CYRILLIC,
4196            LATIN,
4197            GREEK,
4198            INHERITED,
4199            LATIN,
4200            GREEK,
4201            COMMON,
4202            INHERITED,
4203            COMMON,
4204            LATIN,
4205            COMMON,
4206            LATIN,
4207            COMMON,
4208            LATIN,
4209            COMMON,
4210            INHERITED,
4211            COMMON,
4212            GREEK,
4213            COMMON,
4214            LATIN,
4215            COMMON,
4216            LATIN,
4217            COMMON,
4218            LATIN,
4219            COMMON,
4220            LATIN,
4221            COMMON,
4222            BRAILLE,
4223            COMMON,
4224            GLAGOLITIC,
4225            LATIN,
4226            COPTIC,
4227            GEORGIAN,
4228            TIFINAGH,
4229            ETHIOPIC,
4230            CYRILLIC,
4231            COMMON,
4232            HAN,
4233            COMMON,
4234            HAN,
4235            COMMON,
4236            HAN,
4237            COMMON,
4238            HAN,
4239            INHERITED,
4240            HANGUL,
4241            COMMON,
4242            HAN,
4243            COMMON,
4244            HIRAGANA,
4245            INHERITED,
4246            COMMON,
4247            HIRAGANA,
4248            COMMON,
4249            KATAKANA,
4250            COMMON,
4251            KATAKANA,
4252            BOPOMOFO,
4253            HANGUL,
4254            COMMON,
4255            BOPOMOFO,
4256            COMMON,
4257            KATAKANA,
4258            HANGUL,
4259            COMMON,
4260            HANGUL,
4261            COMMON,
4262            KATAKANA,
4263            COMMON,
4264            HAN,
4265            COMMON,
4266            HAN,
4267            YI,
4268            LISU,
4269            VAI,
4270            CYRILLIC,
4271            BAMUM,
4272            COMMON,
4273            LATIN,
4274            COMMON,
4275            LATIN,
4276            SYLOTI_NAGRI,
4277            COMMON,
4278            PHAGS_PA,
4279            SAURASHTRA,
4280            DEVANAGARI,
4281            KAYAH_LI,
4282            REJANG,
4283            HANGUL,
4284            JAVANESE,
4285            CHAM,
4286            MYANMAR,
4287            TAI_VIET,
4288            MEETEI_MAYEK,
4289            ETHIOPIC,
4290            MEETEI_MAYEK,
4291            HANGUL,
4292            UNKNOWN     ,
4293            HAN,
4294            LATIN,
4295            ARMENIAN,
4296            HEBREW,
4297            ARABIC,
4298            COMMON,
4299            ARABIC,
4300            COMMON,
4301            INHERITED,
4302            COMMON,
4303            INHERITED,
4304            COMMON,
4305            ARABIC,
4306            COMMON,
4307            LATIN,
4308            COMMON,
4309            LATIN,
4310            COMMON,
4311            KATAKANA,
4312            COMMON,
4313            KATAKANA,
4314            COMMON,
4315            HANGUL,
4316            COMMON,
4317            LINEAR_B,
4318            COMMON,
4319            GREEK,
4320            COMMON,
4321            INHERITED,
4322            LYCIAN,
4323            CARIAN,
4324            OLD_ITALIC,
4325            GOTHIC,
4326            UGARITIC,
4327            OLD_PERSIAN,
4328            DESERET,
4329            SHAVIAN,
4330            OSMANYA,
4331            CYPRIOT,
4332            IMPERIAL_ARAMAIC,
4333            PHOENICIAN,
4334            LYDIAN,
4335            MEROITIC_HIEROGLYPHS,
4336            MEROITIC_CURSIVE,
4337            KHAROSHTHI,
4338            OLD_SOUTH_ARABIAN,
4339            AVESTAN,
4340            INSCRIPTIONAL_PARTHIAN,
4341            INSCRIPTIONAL_PAHLAVI,
4342            OLD_TURKIC,
4343            ARABIC,
4344            BRAHMI,
4345            KAITHI,
4346            SORA_SOMPENG,
4347            CHAKMA,
4348            SHARADA,
4349            TAKRI,
4350            CUNEIFORM,
4351            EGYPTIAN_HIEROGLYPHS,
4352            BAMUM,
4353            MIAO,
4354            KATAKANA,
4355            HIRAGANA,
4356            COMMON,
4357            INHERITED,
4358            COMMON,
4359            INHERITED,
4360            COMMON,
4361            INHERITED,
4362            COMMON,
4363            INHERITED,
4364            COMMON,
4365            GREEK,
4366            COMMON,
4367            ARABIC,
4368            COMMON,
4369            HIRAGANA,
4370            COMMON,
4371            HAN,
4372            COMMON,
4373            INHERITED,
4374            UNKNOWN
4375        };
4376
4377        private static HashMap<String, Character.UnicodeScript> aliases;
4378        static {
4379            aliases = new HashMap<>(128);
4380            aliases.put("ARAB", ARABIC);
4381            aliases.put("ARMI", IMPERIAL_ARAMAIC);
4382            aliases.put("ARMN", ARMENIAN);
4383            aliases.put("AVST", AVESTAN);
4384            aliases.put("BALI", BALINESE);
4385            aliases.put("BAMU", BAMUM);
4386            aliases.put("BATK", BATAK);
4387            aliases.put("BENG", BENGALI);
4388            aliases.put("BOPO", BOPOMOFO);
4389            aliases.put("BRAI", BRAILLE);
4390            aliases.put("BRAH", BRAHMI);
4391            aliases.put("BUGI", BUGINESE);
4392            aliases.put("BUHD", BUHID);
4393            aliases.put("CAKM", CHAKMA);
4394            aliases.put("CANS", CANADIAN_ABORIGINAL);
4395            aliases.put("CARI", CARIAN);
4396            aliases.put("CHAM", CHAM);
4397            aliases.put("CHER", CHEROKEE);
4398            aliases.put("COPT", COPTIC);
4399            aliases.put("CPRT", CYPRIOT);
4400            aliases.put("CYRL", CYRILLIC);
4401            aliases.put("DEVA", DEVANAGARI);
4402            aliases.put("DSRT", DESERET);
4403            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4404            aliases.put("ETHI", ETHIOPIC);
4405            aliases.put("GEOR", GEORGIAN);
4406            aliases.put("GLAG", GLAGOLITIC);
4407            aliases.put("GOTH", GOTHIC);
4408            aliases.put("GREK", GREEK);
4409            aliases.put("GUJR", GUJARATI);
4410            aliases.put("GURU", GURMUKHI);
4411            aliases.put("HANG", HANGUL);
4412            aliases.put("HANI", HAN);
4413            aliases.put("HANO", HANUNOO);
4414            aliases.put("HEBR", HEBREW);
4415            aliases.put("HIRA", HIRAGANA);
4416            // it appears we don't have the KATAKANA_OR_HIRAGANA
4417            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4418            aliases.put("ITAL", OLD_ITALIC);
4419            aliases.put("JAVA", JAVANESE);
4420            aliases.put("KALI", KAYAH_LI);
4421            aliases.put("KANA", KATAKANA);
4422            aliases.put("KHAR", KHAROSHTHI);
4423            aliases.put("KHMR", KHMER);
4424            aliases.put("KNDA", KANNADA);
4425            aliases.put("KTHI", KAITHI);
4426            aliases.put("LANA", TAI_THAM);
4427            aliases.put("LAOO", LAO);
4428            aliases.put("LATN", LATIN);
4429            aliases.put("LEPC", LEPCHA);
4430            aliases.put("LIMB", LIMBU);
4431            aliases.put("LINB", LINEAR_B);
4432            aliases.put("LISU", LISU);
4433            aliases.put("LYCI", LYCIAN);
4434            aliases.put("LYDI", LYDIAN);
4435            aliases.put("MAND", MANDAIC);
4436            aliases.put("MERC", MEROITIC_CURSIVE);
4437            aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4438            aliases.put("MLYM", MALAYALAM);
4439            aliases.put("MONG", MONGOLIAN);
4440            aliases.put("MTEI", MEETEI_MAYEK);
4441            aliases.put("MYMR", MYANMAR);
4442            aliases.put("NKOO", NKO);
4443            aliases.put("OGAM", OGHAM);
4444            aliases.put("OLCK", OL_CHIKI);
4445            aliases.put("ORKH", OLD_TURKIC);
4446            aliases.put("ORYA", ORIYA);
4447            aliases.put("OSMA", OSMANYA);
4448            aliases.put("PHAG", PHAGS_PA);
4449            aliases.put("PLRD", MIAO);
4450            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4451            aliases.put("PHNX", PHOENICIAN);
4452            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4453            aliases.put("RJNG", REJANG);
4454            aliases.put("RUNR", RUNIC);
4455            aliases.put("SAMR", SAMARITAN);
4456            aliases.put("SARB", OLD_SOUTH_ARABIAN);
4457            aliases.put("SAUR", SAURASHTRA);
4458            aliases.put("SHAW", SHAVIAN);
4459            aliases.put("SHRD", SHARADA);
4460            aliases.put("SINH", SINHALA);
4461            aliases.put("SORA", SORA_SOMPENG);
4462            aliases.put("SUND", SUNDANESE);
4463            aliases.put("SYLO", SYLOTI_NAGRI);
4464            aliases.put("SYRC", SYRIAC);
4465            aliases.put("TAGB", TAGBANWA);
4466            aliases.put("TALE", TAI_LE);
4467            aliases.put("TAKR", TAKRI);
4468            aliases.put("TALU", NEW_TAI_LUE);
4469            aliases.put("TAML", TAMIL);
4470            aliases.put("TAVT", TAI_VIET);
4471            aliases.put("TELU", TELUGU);
4472            aliases.put("TFNG", TIFINAGH);
4473            aliases.put("TGLG", TAGALOG);
4474            aliases.put("THAA", THAANA);
4475            aliases.put("THAI", THAI);
4476            aliases.put("TIBT", TIBETAN);
4477            aliases.put("UGAR", UGARITIC);
4478            aliases.put("VAII", VAI);
4479            aliases.put("XPEO", OLD_PERSIAN);
4480            aliases.put("XSUX", CUNEIFORM);
4481            aliases.put("YIII", YI);
4482            aliases.put("ZINH", INHERITED);
4483            aliases.put("ZYYY", COMMON);
4484            aliases.put("ZZZZ", UNKNOWN);
4485        }
4486
4487        /**
4488         * Returns the enum constant representing the Unicode script of which
4489         * the given character (Unicode code point) is assigned to.
4490         *
4491         * @param   codePoint the character (Unicode code point) in question.
4492         * @return  The {@code UnicodeScript} constant representing the
4493         *          Unicode script of which this character is assigned to.
4494         *
4495         * @exception IllegalArgumentException if the specified
4496         * {@code codePoint} is an invalid Unicode code point.
4497         * @see Character#isValidCodePoint(int)
4498         *
4499         */
4500        public static UnicodeScript of(int codePoint) {
4501            if (!isValidCodePoint(codePoint))
4502                throw new IllegalArgumentException();
4503            int type = getType(codePoint);
4504            // leave SURROGATE and PRIVATE_USE for table lookup
4505            if (type == UNASSIGNED)
4506                return UNKNOWN;
4507            int index = Arrays.binarySearch(scriptStarts, codePoint);
4508            if (index < 0)
4509                index = -index - 2;
4510            return scripts[index];
4511        }
4512
4513        /**
4514         * Returns the UnicodeScript constant with the given Unicode script
4515         * name or the script name alias. Script names and their aliases are
4516         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4517         * and PropertyValueAliases&lt;version&gt;.txt define script names
4518         * and the script name aliases for a particular version of the
4519         * standard. The {@link Character} class specifies the version of
4520         * the standard that it supports.
4521         * <p>
4522         * Character case is ignored for all of the valid script names.
4523         * The en_US locale's case mapping rules are used to provide
4524         * case-insensitive string comparisons for script name validation.
4525         * <p>
4526         *
4527         * @param scriptName A {@code UnicodeScript} name.
4528         * @return The {@code UnicodeScript} constant identified
4529         *         by {@code scriptName}
4530         * @throws IllegalArgumentException if {@code scriptName} is an
4531         *         invalid name
4532         * @throws NullPointerException if {@code scriptName} is null
4533         */
4534        public static final UnicodeScript forName(String scriptName) {
4535            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4536                                 //.replace(' ', '_'));
4537            UnicodeScript sc = aliases.get(scriptName);
4538            if (sc != null)
4539                return sc;
4540            return valueOf(scriptName);
4541        }
4542    }
4543
4544    /**
4545     * The value of the {@code Character}.
4546     *
4547     * @serial
4548     */
4549    private final char value;
4550
4551    /** use serialVersionUID from JDK 1.0.2 for interoperability */
4552    private static final long serialVersionUID = 3786198910865385080L;
4553
4554    /**
4555     * Constructs a newly allocated {@code Character} object that
4556     * represents the specified {@code char} value.
4557     *
4558     * @param  value   the value to be represented by the
4559     *                  {@code Character} object.
4560     */
4561    public Character(char value) {
4562        this.value = value;
4563    }
4564
4565    private static class CharacterCache {
4566        private CharacterCache(){}
4567
4568        static final Character cache[] = new Character[127 + 1];
4569
4570        static {
4571            for (int i = 0; i < cache.length; i++)
4572                cache[i] = new Character((char)i);
4573        }
4574    }
4575
4576    /**
4577     * Returns a <tt>Character</tt> instance representing the specified
4578     * <tt>char</tt> value.
4579     * If a new <tt>Character</tt> instance is not required, this method
4580     * should generally be used in preference to the constructor
4581     * {@link #Character(char)}, as this method is likely to yield
4582     * significantly better space and time performance by caching
4583     * frequently requested values.
4584     *
4585     * This method will always cache values in the range {@code
4586     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4587     * cache other values outside of this range.
4588     *
4589     * @param  c a char value.
4590     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4591     * @since  1.5
4592     */
4593    public static Character valueOf(char c) {
4594        if (c <= 127) { // must cache
4595            return CharacterCache.cache[(int)c];
4596        }
4597        return new Character(c);
4598    }
4599
4600    /**
4601     * Returns the value of this {@code Character} object.
4602     * @return  the primitive {@code char} value represented by
4603     *          this object.
4604     */
4605    public char charValue() {
4606        return value;
4607    }
4608
4609    /**
4610     * Returns a hash code for this {@code Character}; equal to the result
4611     * of invoking {@code charValue()}.
4612     *
4613     * @return a hash code value for this {@code Character}
4614     */
4615    @Override
4616    public int hashCode() {
4617        return Character.hashCode(value);
4618    }
4619
4620    /**
4621     * Returns a hash code for a {@code char} value; compatible with
4622     * {@code Character.hashCode()}.
4623     *
4624     * @since 1.8
4625     *
4626     * @param value The {@code char} for which to return a hash code.
4627     * @return a hash code value for a {@code char} value.
4628     */
4629    public static int hashCode(char value) {
4630        return (int)value;
4631    }
4632
4633    /**
4634     * Compares this object against the specified object.
4635     * The result is {@code true} if and only if the argument is not
4636     * {@code null} and is a {@code Character} object that
4637     * represents the same {@code char} value as this object.
4638     *
4639     * @param   obj   the object to compare with.
4640     * @return  {@code true} if the objects are the same;
4641     *          {@code false} otherwise.
4642     */
4643    public boolean equals(Object obj) {
4644        if (obj instanceof Character) {
4645            return value == ((Character)obj).charValue();
4646        }
4647        return false;
4648    }
4649
4650    /**
4651     * Returns a {@code String} object representing this
4652     * {@code Character}'s value.  The result is a string of
4653     * length 1 whose sole component is the primitive
4654     * {@code char} value represented by this
4655     * {@code Character} object.
4656     *
4657     * @return  a string representation of this object.
4658     */
4659    public String toString() {
4660        char buf[] = {value};
4661        return String.valueOf(buf);
4662    }
4663
4664    /**
4665     * Returns a {@code String} object representing the
4666     * specified {@code char}.  The result is a string of length
4667     * 1 consisting solely of the specified {@code char}.
4668     *
4669     * @param c the {@code char} to be converted
4670     * @return the string representation of the specified {@code char}
4671     * @since 1.4
4672     */
4673    public static String toString(char c) {
4674        return String.valueOf(c);
4675    }
4676
4677    /**
4678     * Determines whether the specified code point is a valid
4679     * <a href="http://www.unicode.org/glossary/#code_point">
4680     * Unicode code point value</a>.
4681     *
4682     * @param  codePoint the Unicode code point to be tested
4683     * @return {@code true} if the specified code point value is between
4684     *         {@link #MIN_CODE_POINT} and
4685     *         {@link #MAX_CODE_POINT} inclusive;
4686     *         {@code false} otherwise.
4687     * @since  1.5
4688     */
4689    public static boolean isValidCodePoint(int codePoint) {
4690        // Optimized form of:
4691        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4692        int plane = codePoint >>> 16;
4693        return plane < ((MAX_CODE_POINT + 1) >>> 16);
4694    }
4695
4696    /**
4697     * Determines whether the specified character (Unicode code point)
4698     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4699     * Such code points can be represented using a single {@code char}.
4700     *
4701     * @param  codePoint the character (Unicode code point) to be tested
4702     * @return {@code true} if the specified code point is between
4703     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4704     *         {@code false} otherwise.
4705     * @since  1.7
4706     */
4707    public static boolean isBmpCodePoint(int codePoint) {
4708        return codePoint >>> 16 == 0;
4709        // Optimized form of:
4710        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4711        // We consistently use logical shift (>>>) to facilitate
4712        // additional runtime optimizations.
4713    }
4714
4715    /**
4716     * Determines whether the specified character (Unicode code point)
4717     * is in the <a href="#supplementary">supplementary character</a> range.
4718     *
4719     * @param  codePoint the character (Unicode code point) to be tested
4720     * @return {@code true} if the specified code point is between
4721     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4722     *         {@link #MAX_CODE_POINT} inclusive;
4723     *         {@code false} otherwise.
4724     * @since  1.5
4725     */
4726    public static boolean isSupplementaryCodePoint(int codePoint) {
4727        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4728            && codePoint <  MAX_CODE_POINT + 1;
4729    }
4730
4731    /**
4732     * Determines if the given {@code char} value is a
4733     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4734     * Unicode high-surrogate code unit</a>
4735     * (also known as <i>leading-surrogate code unit</i>).
4736     *
4737     * <p>Such values do not represent characters by themselves,
4738     * but are used in the representation of
4739     * <a href="#supplementary">supplementary characters</a>
4740     * in the UTF-16 encoding.
4741     *
4742     * @param  ch the {@code char} value to be tested.
4743     * @return {@code true} if the {@code char} value is between
4744     *         {@link #MIN_HIGH_SURROGATE} and
4745     *         {@link #MAX_HIGH_SURROGATE} inclusive;
4746     *         {@code false} otherwise.
4747     * @see    Character#isLowSurrogate(char)
4748     * @see    Character.UnicodeBlock#of(int)
4749     * @since  1.5
4750     */
4751    public static boolean isHighSurrogate(char ch) {
4752        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4753        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4754    }
4755
4756    /**
4757     * Determines if the given {@code char} value is a
4758     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4759     * Unicode low-surrogate code unit</a>
4760     * (also known as <i>trailing-surrogate code unit</i>).
4761     *
4762     * <p>Such values do not represent characters by themselves,
4763     * but are used in the representation of
4764     * <a href="#supplementary">supplementary characters</a>
4765     * in the UTF-16 encoding.
4766     *
4767     * @param  ch the {@code char} value to be tested.
4768     * @return {@code true} if the {@code char} value is between
4769     *         {@link #MIN_LOW_SURROGATE} and
4770     *         {@link #MAX_LOW_SURROGATE} inclusive;
4771     *         {@code false} otherwise.
4772     * @see    Character#isHighSurrogate(char)
4773     * @since  1.5
4774     */
4775    public static boolean isLowSurrogate(char ch) {
4776        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4777    }
4778
4779    /**
4780     * Determines if the given {@code char} value is a Unicode
4781     * <i>surrogate code unit</i>.
4782     *
4783     * <p>Such values do not represent characters by themselves,
4784     * but are used in the representation of
4785     * <a href="#supplementary">supplementary characters</a>
4786     * in the UTF-16 encoding.
4787     *
4788     * <p>A char value is a surrogate code unit if and only if it is either
4789     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4790     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4791     *
4792     * @param  ch the {@code char} value to be tested.
4793     * @return {@code true} if the {@code char} value is between
4794     *         {@link #MIN_SURROGATE} and
4795     *         {@link #MAX_SURROGATE} inclusive;
4796     *         {@code false} otherwise.
4797     * @since  1.7
4798     */
4799    public static boolean isSurrogate(char ch) {
4800        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4801    }
4802
4803    /**
4804     * Determines whether the specified pair of {@code char}
4805     * values is a valid
4806     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4807     * Unicode surrogate pair</a>.
4808
4809     * <p>This method is equivalent to the expression:
4810     * <blockquote><pre>{@code
4811     * isHighSurrogate(high) && isLowSurrogate(low)
4812     * }</pre></blockquote>
4813     *
4814     * @param  high the high-surrogate code value to be tested
4815     * @param  low the low-surrogate code value to be tested
4816     * @return {@code true} if the specified high and
4817     * low-surrogate code values represent a valid surrogate pair;
4818     * {@code false} otherwise.
4819     * @since  1.5
4820     */
4821    public static boolean isSurrogatePair(char high, char low) {
4822        return isHighSurrogate(high) && isLowSurrogate(low);
4823    }
4824
4825    /**
4826     * Determines the number of {@code char} values needed to
4827     * represent the specified character (Unicode code point). If the
4828     * specified character is equal to or greater than 0x10000, then
4829     * the method returns 2. Otherwise, the method returns 1.
4830     *
4831     * <p>This method doesn't validate the specified character to be a
4832     * valid Unicode code point. The caller must validate the
4833     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4834     * if necessary.
4835     *
4836     * @param   codePoint the character (Unicode code point) to be tested.
4837     * @return  2 if the character is a valid supplementary character; 1 otherwise.
4838     * @see     Character#isSupplementaryCodePoint(int)
4839     * @since   1.5
4840     */
4841    public static int charCount(int codePoint) {
4842        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4843    }
4844
4845    /**
4846     * Converts the specified surrogate pair to its supplementary code
4847     * point value. This method does not validate the specified
4848     * surrogate pair. The caller must validate it using {@link
4849     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4850     *
4851     * @param  high the high-surrogate code unit
4852     * @param  low the low-surrogate code unit
4853     * @return the supplementary code point composed from the
4854     *         specified surrogate pair.
4855     * @since  1.5
4856     */
4857    public static int toCodePoint(char high, char low) {
4858        // Optimized form of:
4859        // return ((high - MIN_HIGH_SURROGATE) << 10)
4860        //         + (low - MIN_LOW_SURROGATE)
4861        //         + MIN_SUPPLEMENTARY_CODE_POINT;
4862        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4863                                       - (MIN_HIGH_SURROGATE << 10)
4864                                       - MIN_LOW_SURROGATE);
4865    }
4866
4867    /**
4868     * Returns the code point at the given index of the
4869     * {@code CharSequence}. If the {@code char} value at
4870     * the given index in the {@code CharSequence} is in the
4871     * high-surrogate range, the following index is less than the
4872     * length of the {@code CharSequence}, and the
4873     * {@code char} value at the following index is in the
4874     * low-surrogate range, then the supplementary code point
4875     * corresponding to this surrogate pair is returned. Otherwise,
4876     * the {@code char} value at the given index is returned.
4877     *
4878     * @param seq a sequence of {@code char} values (Unicode code
4879     * units)
4880     * @param index the index to the {@code char} values (Unicode
4881     * code units) in {@code seq} to be converted
4882     * @return the Unicode code point at the given index
4883     * @exception NullPointerException if {@code seq} is null.
4884     * @exception IndexOutOfBoundsException if the value
4885     * {@code index} is negative or not less than
4886     * {@link CharSequence#length() seq.length()}.
4887     * @since  1.5
4888     */
4889    public static int codePointAt(CharSequence seq, int index) {
4890        char c1 = seq.charAt(index);
4891        if (isHighSurrogate(c1) && ++index < seq.length()) {
4892            char c2 = seq.charAt(index);
4893            if (isLowSurrogate(c2)) {
4894                return toCodePoint(c1, c2);
4895            }
4896        }
4897        return c1;
4898    }
4899
4900    /**
4901     * Returns the code point at the given index of the
4902     * {@code char} array. If the {@code char} value at
4903     * the given index in the {@code char} array is in the
4904     * high-surrogate range, the following index is less than the
4905     * length of the {@code char} array, and the
4906     * {@code char} value at the following index is in the
4907     * low-surrogate range, then the supplementary code point
4908     * corresponding to this surrogate pair is returned. Otherwise,
4909     * the {@code char} value at the given index is returned.
4910     *
4911     * @param a the {@code char} array
4912     * @param index the index to the {@code char} values (Unicode
4913     * code units) in the {@code char} array to be converted
4914     * @return the Unicode code point at the given index
4915     * @exception NullPointerException if {@code a} is null.
4916     * @exception IndexOutOfBoundsException if the value
4917     * {@code index} is negative or not less than
4918     * the length of the {@code char} array.
4919     * @since  1.5
4920     */
4921    public static int codePointAt(char[] a, int index) {
4922        return codePointAtImpl(a, index, a.length);
4923    }
4924
4925    /**
4926     * Returns the code point at the given index of the
4927     * {@code char} array, where only array elements with
4928     * {@code index} less than {@code limit} can be used. If
4929     * the {@code char} value at the given index in the
4930     * {@code char} array is in the high-surrogate range, the
4931     * following index is less than the {@code limit}, and the
4932     * {@code char} value at the following index is in the
4933     * low-surrogate range, then the supplementary code point
4934     * corresponding to this surrogate pair is returned. Otherwise,
4935     * the {@code char} value at the given index is returned.
4936     *
4937     * @param a the {@code char} array
4938     * @param index the index to the {@code char} values (Unicode
4939     * code units) in the {@code char} array to be converted
4940     * @param limit the index after the last array element that
4941     * can be used in the {@code char} array
4942     * @return the Unicode code point at the given index
4943     * @exception NullPointerException if {@code a} is null.
4944     * @exception IndexOutOfBoundsException if the {@code index}
4945     * argument is negative or not less than the {@code limit}
4946     * argument, or if the {@code limit} argument is negative or
4947     * greater than the length of the {@code char} array.
4948     * @since  1.5
4949     */
4950    public static int codePointAt(char[] a, int index, int limit) {
4951        if (index >= limit || limit < 0 || limit > a.length) {
4952            throw new IndexOutOfBoundsException();
4953        }
4954        return codePointAtImpl(a, index, limit);
4955    }
4956
4957    // throws ArrayIndexOutOfBoundsException if index out of bounds
4958    static int codePointAtImpl(char[] a, int index, int limit) {
4959        char c1 = a[index];
4960        if (isHighSurrogate(c1) && ++index < limit) {
4961            char c2 = a[index];
4962            if (isLowSurrogate(c2)) {
4963                return toCodePoint(c1, c2);
4964            }
4965        }
4966        return c1;
4967    }
4968
4969    /**
4970     * Returns the code point preceding the given index of the
4971     * {@code CharSequence}. If the {@code char} value at
4972     * {@code (index - 1)} in the {@code CharSequence} is in
4973     * the low-surrogate range, {@code (index - 2)} is not
4974     * negative, and the {@code char} value at {@code (index - 2)}
4975     * in the {@code CharSequence} is in the
4976     * high-surrogate range, then the supplementary code point
4977     * corresponding to this surrogate pair is returned. Otherwise,
4978     * the {@code char} value at {@code (index - 1)} is
4979     * returned.
4980     *
4981     * @param seq the {@code CharSequence} instance
4982     * @param index the index following the code point that should be returned
4983     * @return the Unicode code point value before the given index.
4984     * @exception NullPointerException if {@code seq} is null.
4985     * @exception IndexOutOfBoundsException if the {@code index}
4986     * argument is less than 1 or greater than {@link
4987     * CharSequence#length() seq.length()}.
4988     * @since  1.5
4989     */
4990    public static int codePointBefore(CharSequence seq, int index) {
4991        char c2 = seq.charAt(--index);
4992        if (isLowSurrogate(c2) && index > 0) {
4993            char c1 = seq.charAt(--index);
4994            if (isHighSurrogate(c1)) {
4995                return toCodePoint(c1, c2);
4996            }
4997        }
4998        return c2;
4999    }
5000
5001    /**
5002     * Returns the code point preceding the given index of the
5003     * {@code char} array. If the {@code char} value at
5004     * {@code (index - 1)} in the {@code char} array is in
5005     * the low-surrogate range, {@code (index - 2)} is not
5006     * negative, and the {@code char} value at {@code (index - 2)}
5007     * in the {@code char} array is in the
5008     * high-surrogate range, then the supplementary code point
5009     * corresponding to this surrogate pair is returned. Otherwise,
5010     * the {@code char} value at {@code (index - 1)} is
5011     * returned.
5012     *
5013     * @param a the {@code char} array
5014     * @param index the index following the code point that should be returned
5015     * @return the Unicode code point value before the given index.
5016     * @exception NullPointerException if {@code a} is null.
5017     * @exception IndexOutOfBoundsException if the {@code index}
5018     * argument is less than 1 or greater than the length of the
5019     * {@code char} array
5020     * @since  1.5
5021     */
5022    public static int codePointBefore(char[] a, int index) {
5023        return codePointBeforeImpl(a, index, 0);
5024    }
5025
5026    /**
5027     * Returns the code point preceding the given index of the
5028     * {@code char} array, where only array elements with
5029     * {@code index} greater than or equal to {@code start}
5030     * can be used. If the {@code char} value at {@code (index - 1)}
5031     * in the {@code char} array is in the
5032     * low-surrogate range, {@code (index - 2)} is not less than
5033     * {@code start}, and the {@code char} value at
5034     * {@code (index - 2)} in the {@code char} array is in
5035     * the high-surrogate range, then the supplementary code point
5036     * corresponding to this surrogate pair is returned. Otherwise,
5037     * the {@code char} value at {@code (index - 1)} is
5038     * returned.
5039     *
5040     * @param a the {@code char} array
5041     * @param index the index following the code point that should be returned
5042     * @param start the index of the first array element in the
5043     * {@code char} array
5044     * @return the Unicode code point value before the given index.
5045     * @exception NullPointerException if {@code a} is null.
5046     * @exception IndexOutOfBoundsException if the {@code index}
5047     * argument is not greater than the {@code start} argument or
5048     * is greater than the length of the {@code char} array, or
5049     * if the {@code start} argument is negative or not less than
5050     * the length of the {@code char} array.
5051     * @since  1.5
5052     */
5053    public static int codePointBefore(char[] a, int index, int start) {
5054        if (index <= start || start < 0 || start >= a.length) {
5055            throw new IndexOutOfBoundsException();
5056        }
5057        return codePointBeforeImpl(a, index, start);
5058    }
5059
5060    // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5061    static int codePointBeforeImpl(char[] a, int index, int start) {
5062        char c2 = a[--index];
5063        if (isLowSurrogate(c2) && index > start) {
5064            char c1 = a[--index];
5065            if (isHighSurrogate(c1)) {
5066                return toCodePoint(c1, c2);
5067            }
5068        }
5069        return c2;
5070    }
5071
5072    /**
5073     * Returns the leading surrogate (a
5074     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5075     * high surrogate code unit</a>) of the
5076     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5077     * surrogate pair</a>
5078     * representing the specified supplementary character (Unicode
5079     * code point) in the UTF-16 encoding.  If the specified character
5080     * is not a
5081     * <a href="Character.html#supplementary">supplementary character</a>,
5082     * an unspecified {@code char} is returned.
5083     *
5084     * <p>If
5085     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5086     * is {@code true}, then
5087     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5088     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5089     * are also always {@code true}.
5090     *
5091     * @param   codePoint a supplementary character (Unicode code point)
5092     * @return  the leading surrogate code unit used to represent the
5093     *          character in the UTF-16 encoding
5094     * @since   1.7
5095     */
5096    public static char highSurrogate(int codePoint) {
5097        return (char) ((codePoint >>> 10)
5098            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5099    }
5100
5101    /**
5102     * Returns the trailing surrogate (a
5103     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5104     * low surrogate code unit</a>) of the
5105     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5106     * surrogate pair</a>
5107     * representing the specified supplementary character (Unicode
5108     * code point) in the UTF-16 encoding.  If the specified character
5109     * is not a
5110     * <a href="Character.html#supplementary">supplementary character</a>,
5111     * an unspecified {@code char} is returned.
5112     *
5113     * <p>If
5114     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5115     * is {@code true}, then
5116     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5117     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5118     * are also always {@code true}.
5119     *
5120     * @param   codePoint a supplementary character (Unicode code point)
5121     * @return  the trailing surrogate code unit used to represent the
5122     *          character in the UTF-16 encoding
5123     * @since   1.7
5124     */
5125    public static char lowSurrogate(int codePoint) {
5126        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5127    }
5128
5129    /**
5130     * Converts the specified character (Unicode code point) to its
5131     * UTF-16 representation. If the specified code point is a BMP
5132     * (Basic Multilingual Plane or Plane 0) value, the same value is
5133     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5134     * specified code point is a supplementary character, its
5135     * surrogate values are stored in {@code dst[dstIndex]}
5136     * (high-surrogate) and {@code dst[dstIndex+1]}
5137     * (low-surrogate), and 2 is returned.
5138     *
5139     * @param  codePoint the character (Unicode code point) to be converted.
5140     * @param  dst an array of {@code char} in which the
5141     * {@code codePoint}'s UTF-16 value is stored.
5142     * @param dstIndex the start index into the {@code dst}
5143     * array where the converted value is stored.
5144     * @return 1 if the code point is a BMP code point, 2 if the
5145     * code point is a supplementary code point.
5146     * @exception IllegalArgumentException if the specified
5147     * {@code codePoint} is not a valid Unicode code point.
5148     * @exception NullPointerException if the specified {@code dst} is null.
5149     * @exception IndexOutOfBoundsException if {@code dstIndex}
5150     * is negative or not less than {@code dst.length}, or if
5151     * {@code dst} at {@code dstIndex} doesn't have enough
5152     * array element(s) to store the resulting {@code char}
5153     * value(s). (If {@code dstIndex} is equal to
5154     * {@code dst.length-1} and the specified
5155     * {@code codePoint} is a supplementary character, the
5156     * high-surrogate value is not stored in
5157     * {@code dst[dstIndex]}.)
5158     * @since  1.5
5159     */
5160    public static int toChars(int codePoint, char[] dst, int dstIndex) {
5161        if (isBmpCodePoint(codePoint)) {
5162            dst[dstIndex] = (char) codePoint;
5163            return 1;
5164        } else if (isValidCodePoint(codePoint)) {
5165            toSurrogates(codePoint, dst, dstIndex);
5166            return 2;
5167        } else {
5168            throw new IllegalArgumentException();
5169        }
5170    }
5171
5172    /**
5173     * Converts the specified character (Unicode code point) to its
5174     * UTF-16 representation stored in a {@code char} array. If
5175     * the specified code point is a BMP (Basic Multilingual Plane or
5176     * Plane 0) value, the resulting {@code char} array has
5177     * the same value as {@code codePoint}. If the specified code
5178     * point is a supplementary code point, the resulting
5179     * {@code char} array has the corresponding surrogate pair.
5180     *
5181     * @param  codePoint a Unicode code point
5182     * @return a {@code char} array having
5183     *         {@code codePoint}'s UTF-16 representation.
5184     * @exception IllegalArgumentException if the specified
5185     * {@code codePoint} is not a valid Unicode code point.
5186     * @since  1.5
5187     */
5188    public static char[] toChars(int codePoint) {
5189        if (isBmpCodePoint(codePoint)) {
5190            return new char[] { (char) codePoint };
5191        } else if (isValidCodePoint(codePoint)) {
5192            char[] result = new char[2];
5193            toSurrogates(codePoint, result, 0);
5194            return result;
5195        } else {
5196            throw new IllegalArgumentException();
5197        }
5198    }
5199
5200    static void toSurrogates(int codePoint, char[] dst, int index) {
5201        // We write elements "backwards" to guarantee all-or-nothing
5202        dst[index+1] = lowSurrogate(codePoint);
5203        dst[index] = highSurrogate(codePoint);
5204    }
5205
5206    /**
5207     * Returns the number of Unicode code points in the text range of
5208     * the specified char sequence. The text range begins at the
5209     * specified {@code beginIndex} and extends to the
5210     * {@code char} at index {@code endIndex - 1}. Thus the
5211     * length (in {@code char}s) of the text range is
5212     * {@code endIndex-beginIndex}. Unpaired surrogates within
5213     * the text range count as one code point each.
5214     *
5215     * @param seq the char sequence
5216     * @param beginIndex the index to the first {@code char} of
5217     * the text range.
5218     * @param endIndex the index after the last {@code char} of
5219     * the text range.
5220     * @return the number of Unicode code points in the specified text
5221     * range
5222     * @exception NullPointerException if {@code seq} is null.
5223     * @exception IndexOutOfBoundsException if the
5224     * {@code beginIndex} is negative, or {@code endIndex}
5225     * is larger than the length of the given sequence, or
5226     * {@code beginIndex} is larger than {@code endIndex}.
5227     * @since  1.5
5228     */
5229    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5230        int length = seq.length();
5231        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5232            throw new IndexOutOfBoundsException();
5233        }
5234        int n = endIndex - beginIndex;
5235        for (int i = beginIndex; i < endIndex; ) {
5236            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5237                isLowSurrogate(seq.charAt(i))) {
5238                n--;
5239                i++;
5240            }
5241        }
5242        return n;
5243    }
5244
5245    /**
5246     * Returns the number of Unicode code points in a subarray of the
5247     * {@code char} array argument. The {@code offset}
5248     * argument is the index of the first {@code char} of the
5249     * subarray and the {@code count} argument specifies the
5250     * length of the subarray in {@code char}s. Unpaired
5251     * surrogates within the subarray count as one code point each.
5252     *
5253     * @param a the {@code char} array
5254     * @param offset the index of the first {@code char} in the
5255     * given {@code char} array
5256     * @param count the length of the subarray in {@code char}s
5257     * @return the number of Unicode code points in the specified subarray
5258     * @exception NullPointerException if {@code a} is null.
5259     * @exception IndexOutOfBoundsException if {@code offset} or
5260     * {@code count} is negative, or if {@code offset +
5261     * count} is larger than the length of the given array.
5262     * @since  1.5
5263     */
5264    public static int codePointCount(char[] a, int offset, int count) {
5265        if (count > a.length - offset || offset < 0 || count < 0) {
5266            throw new IndexOutOfBoundsException();
5267        }
5268        return codePointCountImpl(a, offset, count);
5269    }
5270
5271    static int codePointCountImpl(char[] a, int offset, int count) {
5272        int endIndex = offset + count;
5273        int n = count;
5274        for (int i = offset; i < endIndex; ) {
5275            if (isHighSurrogate(a[i++]) && i < endIndex &&
5276                isLowSurrogate(a[i])) {
5277                n--;
5278                i++;
5279            }
5280        }
5281        return n;
5282    }
5283
5284    /**
5285     * Returns the index within the given char sequence that is offset
5286     * from the given {@code index} by {@code codePointOffset}
5287     * code points. Unpaired surrogates within the text range given by
5288     * {@code index} and {@code codePointOffset} count as
5289     * one code point each.
5290     *
5291     * @param seq the char sequence
5292     * @param index the index to be offset
5293     * @param codePointOffset the offset in code points
5294     * @return the index within the char sequence
5295     * @exception NullPointerException if {@code seq} is null.
5296     * @exception IndexOutOfBoundsException if {@code index}
5297     *   is negative or larger then the length of the char sequence,
5298     *   or if {@code codePointOffset} is positive and the
5299     *   subsequence starting with {@code index} has fewer than
5300     *   {@code codePointOffset} code points, or if
5301     *   {@code codePointOffset} is negative and the subsequence
5302     *   before {@code index} has fewer than the absolute value
5303     *   of {@code codePointOffset} code points.
5304     * @since 1.5
5305     */
5306    public static int offsetByCodePoints(CharSequence seq, int index,
5307                                         int codePointOffset) {
5308        int length = seq.length();
5309        if (index < 0 || index > length) {
5310            throw new IndexOutOfBoundsException();
5311        }
5312
5313        int x = index;
5314        if (codePointOffset >= 0) {
5315            int i;
5316            for (i = 0; x < length && i < codePointOffset; i++) {
5317                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5318                    isLowSurrogate(seq.charAt(x))) {
5319                    x++;
5320                }
5321            }
5322            if (i < codePointOffset) {
5323                throw new IndexOutOfBoundsException();
5324            }
5325        } else {
5326            int i;
5327            for (i = codePointOffset; x > 0 && i < 0; i++) {
5328                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5329                    isHighSurrogate(seq.charAt(x-1))) {
5330                    x--;
5331                }
5332            }
5333            if (i < 0) {
5334                throw new IndexOutOfBoundsException();
5335            }
5336        }
5337        return x;
5338    }
5339
5340    /**
5341     * Returns the index within the given {@code char} subarray
5342     * that is offset from the given {@code index} by
5343     * {@code codePointOffset} code points. The
5344     * {@code start} and {@code count} arguments specify a
5345     * subarray of the {@code char} array. Unpaired surrogates
5346     * within the text range given by {@code index} and
5347     * {@code codePointOffset} count as one code point each.
5348     *
5349     * @param a the {@code char} array
5350     * @param start the index of the first {@code char} of the
5351     * subarray
5352     * @param count the length of the subarray in {@code char}s
5353     * @param index the index to be offset
5354     * @param codePointOffset the offset in code points
5355     * @return the index within the subarray
5356     * @exception NullPointerException if {@code a} is null.
5357     * @exception IndexOutOfBoundsException
5358     *   if {@code start} or {@code count} is negative,
5359     *   or if {@code start + count} is larger than the length of
5360     *   the given array,
5361     *   or if {@code index} is less than {@code start} or
5362     *   larger then {@code start + count},
5363     *   or if {@code codePointOffset} is positive and the text range
5364     *   starting with {@code index} and ending with {@code start + count - 1}
5365     *   has fewer than {@code codePointOffset} code
5366     *   points,
5367     *   or if {@code codePointOffset} is negative and the text range
5368     *   starting with {@code start} and ending with {@code index - 1}
5369     *   has fewer than the absolute value of
5370     *   {@code codePointOffset} code points.
5371     * @since 1.5
5372     */
5373    public static int offsetByCodePoints(char[] a, int start, int count,
5374                                         int index, int codePointOffset) {
5375        if (count > a.length-start || start < 0 || count < 0
5376            || index < start || index > start+count) {
5377            throw new IndexOutOfBoundsException();
5378        }
5379        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5380    }
5381
5382    static int offsetByCodePointsImpl(char[]a, int start, int count,
5383                                      int index, int codePointOffset) {
5384        int x = index;
5385        if (codePointOffset >= 0) {
5386            int limit = start + count;
5387            int i;
5388            for (i = 0; x < limit && i < codePointOffset; i++) {
5389                if (isHighSurrogate(a[x++]) && x < limit &&
5390                    isLowSurrogate(a[x])) {
5391                    x++;
5392                }
5393            }
5394            if (i < codePointOffset) {
5395                throw new IndexOutOfBoundsException();
5396            }
5397        } else {
5398            int i;
5399            for (i = codePointOffset; x > start && i < 0; i++) {
5400                if (isLowSurrogate(a[--x]) && x > start &&
5401                    isHighSurrogate(a[x-1])) {
5402                    x--;
5403                }
5404            }
5405            if (i < 0) {
5406                throw new IndexOutOfBoundsException();
5407            }
5408        }
5409        return x;
5410    }
5411
5412    /**
5413     * Determines if the specified character is a lowercase character.
5414     * <p>
5415     * A character is lowercase if its general category type, provided
5416     * by {@code Character.getType(ch)}, is
5417     * {@code LOWERCASE_LETTER}, or it has contributory property
5418     * Other_Lowercase as defined by the Unicode Standard.
5419     * <p>
5420     * The following are examples of lowercase characters:
5421     * <blockquote><pre>
5422     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5423     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5424     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5425     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5426     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5427     * </pre></blockquote>
5428     * <p> Many other Unicode characters are lowercase too.
5429     *
5430     * <p><b>Note:</b> This method cannot handle <a
5431     * href="#supplementary"> supplementary characters</a>. To support
5432     * all Unicode characters, including supplementary characters, use
5433     * the {@link #isLowerCase(int)} method.
5434     *
5435     * @param   ch   the character to be tested.
5436     * @return  {@code true} if the character is lowercase;
5437     *          {@code false} otherwise.
5438     * @see     Character#isLowerCase(char)
5439     * @see     Character#isTitleCase(char)
5440     * @see     Character#toLowerCase(char)
5441     * @see     Character#getType(char)
5442     */
5443    public static boolean isLowerCase(char ch) {
5444        return isLowerCase((int)ch);
5445    }
5446
5447    /**
5448     * Determines if the specified character (Unicode code point) is a
5449     * lowercase character.
5450     * <p>
5451     * A character is lowercase if its general category type, provided
5452     * by {@link Character#getType getType(codePoint)}, is
5453     * {@code LOWERCASE_LETTER}, or it has contributory property
5454     * Other_Lowercase as defined by the Unicode Standard.
5455     * <p>
5456     * The following are examples of lowercase characters:
5457     * <blockquote><pre>
5458     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5459     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5460     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5461     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5462     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5463     * </pre></blockquote>
5464     * <p> Many other Unicode characters are lowercase too.
5465     *
5466     * @param   codePoint the character (Unicode code point) to be tested.
5467     * @return  {@code true} if the character is lowercase;
5468     *          {@code false} otherwise.
5469     * @see     Character#isLowerCase(int)
5470     * @see     Character#isTitleCase(int)
5471     * @see     Character#toLowerCase(int)
5472     * @see     Character#getType(int)
5473     * @since   1.5
5474     */
5475    public static boolean isLowerCase(int codePoint) {
5476        return isLowerCaseImpl(codePoint);
5477    }
5478
5479    @FastNative
5480    static native boolean isLowerCaseImpl(int codePoint);
5481
5482    /**
5483     * Determines if the specified character is an uppercase character.
5484     * <p>
5485     * A character is uppercase if its general category type, provided by
5486     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5487     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5488     * <p>
5489     * The following are examples of uppercase characters:
5490     * <blockquote><pre>
5491     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5492     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5493     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5494     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5495     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5496     * </pre></blockquote>
5497     * <p> Many other Unicode characters are uppercase too.
5498     *
5499     * <p><b>Note:</b> This method cannot handle <a
5500     * href="#supplementary"> supplementary characters</a>. To support
5501     * all Unicode characters, including supplementary characters, use
5502     * the {@link #isUpperCase(int)} method.
5503     *
5504     * @param   ch   the character to be tested.
5505     * @return  {@code true} if the character is uppercase;
5506     *          {@code false} otherwise.
5507     * @see     Character#isLowerCase(char)
5508     * @see     Character#isTitleCase(char)
5509     * @see     Character#toUpperCase(char)
5510     * @see     Character#getType(char)
5511     * @since   1.0
5512     */
5513    public static boolean isUpperCase(char ch) {
5514        return isUpperCase((int)ch);
5515    }
5516
5517    /**
5518     * Determines if the specified character (Unicode code point) is an uppercase character.
5519     * <p>
5520     * A character is uppercase if its general category type, provided by
5521     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5522     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5523     * <p>
5524     * The following are examples of uppercase characters:
5525     * <blockquote><pre>
5526     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5527     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5528     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5529     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5530     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5531     * </pre></blockquote>
5532     * <p> Many other Unicode characters are uppercase too.<p>
5533     *
5534     * @param   codePoint the character (Unicode code point) to be tested.
5535     * @return  {@code true} if the character is uppercase;
5536     *          {@code false} otherwise.
5537     * @see     Character#isLowerCase(int)
5538     * @see     Character#isTitleCase(int)
5539     * @see     Character#toUpperCase(int)
5540     * @see     Character#getType(int)
5541     * @since   1.5
5542     */
5543    public static boolean isUpperCase(int codePoint) {
5544        return isUpperCaseImpl(codePoint);
5545    }
5546
5547    @FastNative
5548    static native boolean isUpperCaseImpl(int codePoint);
5549
5550
5551    /**
5552     * Determines if the specified character is a titlecase character.
5553     * <p>
5554     * A character is a titlecase character if its general
5555     * category type, provided by {@code Character.getType(ch)},
5556     * is {@code TITLECASE_LETTER}.
5557     * <p>
5558     * Some characters look like pairs of Latin letters. For example, there
5559     * is an uppercase letter that looks like "LJ" and has a corresponding
5560     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5561     * is the appropriate form to use when rendering a word in lowercase
5562     * with initial capitals, as for a book title.
5563     * <p>
5564     * These are some of the Unicode characters for which this method returns
5565     * {@code true}:
5566     * <ul>
5567     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5568     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5569     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5570     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5571     * </ul>
5572     * <p> Many other Unicode characters are titlecase too.
5573     *
5574     * <p><b>Note:</b> This method cannot handle <a
5575     * href="#supplementary"> supplementary characters</a>. To support
5576     * all Unicode characters, including supplementary characters, use
5577     * the {@link #isTitleCase(int)} method.
5578     *
5579     * @param   ch   the character to be tested.
5580     * @return  {@code true} if the character is titlecase;
5581     *          {@code false} otherwise.
5582     * @see     Character#isLowerCase(char)
5583     * @see     Character#isUpperCase(char)
5584     * @see     Character#toTitleCase(char)
5585     * @see     Character#getType(char)
5586     * @since   1.0.2
5587     */
5588    public static boolean isTitleCase(char ch) {
5589        return isTitleCase((int)ch);
5590    }
5591
5592    /**
5593     * Determines if the specified character (Unicode code point) is a titlecase character.
5594     * <p>
5595     * A character is a titlecase character if its general
5596     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5597     * is {@code TITLECASE_LETTER}.
5598     * <p>
5599     * Some characters look like pairs of Latin letters. For example, there
5600     * is an uppercase letter that looks like "LJ" and has a corresponding
5601     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5602     * is the appropriate form to use when rendering a word in lowercase
5603     * with initial capitals, as for a book title.
5604     * <p>
5605     * These are some of the Unicode characters for which this method returns
5606     * {@code true}:
5607     * <ul>
5608     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5609     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5610     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5611     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5612     * </ul>
5613     * <p> Many other Unicode characters are titlecase too.<p>
5614     *
5615     * @param   codePoint the character (Unicode code point) to be tested.
5616     * @return  {@code true} if the character is titlecase;
5617     *          {@code false} otherwise.
5618     * @see     Character#isLowerCase(int)
5619     * @see     Character#isUpperCase(int)
5620     * @see     Character#toTitleCase(int)
5621     * @see     Character#getType(int)
5622     * @since   1.5
5623     */
5624    public static boolean isTitleCase(int codePoint) {
5625        return isTitleCaseImpl(codePoint);
5626    }
5627
5628    @FastNative
5629    static native boolean isTitleCaseImpl(int codePoint);
5630
5631    /**
5632     * Determines if the specified character is a digit.
5633     * <p>
5634     * A character is a digit if its general category type, provided
5635     * by {@code Character.getType(ch)}, is
5636     * {@code DECIMAL_DIGIT_NUMBER}.
5637     * <p>
5638     * Some Unicode character ranges that contain digits:
5639     * <ul>
5640     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5641     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5642     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5643     *     Arabic-Indic digits
5644     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5645     *     Extended Arabic-Indic digits
5646     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5647     *     Devanagari digits
5648     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5649     *     Fullwidth digits
5650     * </ul>
5651     *
5652     * Many other character ranges contain digits as well.
5653     *
5654     * <p><b>Note:</b> This method cannot handle <a
5655     * href="#supplementary"> supplementary characters</a>. To support
5656     * all Unicode characters, including supplementary characters, use
5657     * the {@link #isDigit(int)} method.
5658     *
5659     * @param   ch   the character to be tested.
5660     * @return  {@code true} if the character is a digit;
5661     *          {@code false} otherwise.
5662     * @see     Character#digit(char, int)
5663     * @see     Character#forDigit(int, int)
5664     * @see     Character#getType(char)
5665     */
5666    public static boolean isDigit(char ch) {
5667        return isDigit((int)ch);
5668    }
5669
5670    /**
5671     * Determines if the specified character (Unicode code point) is a digit.
5672     * <p>
5673     * A character is a digit if its general category type, provided
5674     * by {@link Character#getType(int) getType(codePoint)}, is
5675     * {@code DECIMAL_DIGIT_NUMBER}.
5676     * <p>
5677     * Some Unicode character ranges that contain digits:
5678     * <ul>
5679     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5680     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5681     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5682     *     Arabic-Indic digits
5683     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5684     *     Extended Arabic-Indic digits
5685     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5686     *     Devanagari digits
5687     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5688     *     Fullwidth digits
5689     * </ul>
5690     *
5691     * Many other character ranges contain digits as well.
5692     *
5693     * @param   codePoint the character (Unicode code point) to be tested.
5694     * @return  {@code true} if the character is a digit;
5695     *          {@code false} otherwise.
5696     * @see     Character#forDigit(int, int)
5697     * @see     Character#getType(int)
5698     * @since   1.5
5699     */
5700    public static boolean isDigit(int codePoint) {
5701        return isDigitImpl(codePoint);
5702    }
5703
5704    @FastNative
5705    static native boolean isDigitImpl(int codePoint);
5706
5707    /**
5708     * Determines if a character is defined in Unicode.
5709     * <p>
5710     * A character is defined if at least one of the following is true:
5711     * <ul>
5712     * <li>It has an entry in the UnicodeData file.
5713     * <li>It has a value in a range defined by the UnicodeData file.
5714     * </ul>
5715     *
5716     * <p><b>Note:</b> This method cannot handle <a
5717     * href="#supplementary"> supplementary characters</a>. To support
5718     * all Unicode characters, including supplementary characters, use
5719     * the {@link #isDefined(int)} method.
5720     *
5721     * @param   ch   the character to be tested
5722     * @return  {@code true} if the character has a defined meaning
5723     *          in Unicode; {@code false} otherwise.
5724     * @see     Character#isDigit(char)
5725     * @see     Character#isLetter(char)
5726     * @see     Character#isLetterOrDigit(char)
5727     * @see     Character#isLowerCase(char)
5728     * @see     Character#isTitleCase(char)
5729     * @see     Character#isUpperCase(char)
5730     * @since   1.0.2
5731     */
5732    public static boolean isDefined(char ch) {
5733        return isDefined((int)ch);
5734    }
5735
5736    /**
5737     * Determines if a character (Unicode code point) is defined in Unicode.
5738     * <p>
5739     * A character is defined if at least one of the following is true:
5740     * <ul>
5741     * <li>It has an entry in the UnicodeData file.
5742     * <li>It has a value in a range defined by the UnicodeData file.
5743     * </ul>
5744     *
5745     * @param   codePoint the character (Unicode code point) to be tested.
5746     * @return  {@code true} if the character has a defined meaning
5747     *          in Unicode; {@code false} otherwise.
5748     * @see     Character#isDigit(int)
5749     * @see     Character#isLetter(int)
5750     * @see     Character#isLetterOrDigit(int)
5751     * @see     Character#isLowerCase(int)
5752     * @see     Character#isTitleCase(int)
5753     * @see     Character#isUpperCase(int)
5754     * @since   1.5
5755     */
5756    public static boolean isDefined(int codePoint) {
5757        return isDefinedImpl(codePoint);
5758    }
5759
5760    @FastNative
5761    static native boolean isDefinedImpl(int codePoint);
5762
5763    /**
5764     * Determines if the specified character is a letter.
5765     * <p>
5766     * A character is considered to be a letter if its general
5767     * category type, provided by {@code Character.getType(ch)},
5768     * is any of the following:
5769     * <ul>
5770     * <li> {@code UPPERCASE_LETTER}
5771     * <li> {@code LOWERCASE_LETTER}
5772     * <li> {@code TITLECASE_LETTER}
5773     * <li> {@code MODIFIER_LETTER}
5774     * <li> {@code OTHER_LETTER}
5775     * </ul>
5776     *
5777     * Not all letters have case. Many characters are
5778     * letters but are neither uppercase nor lowercase nor titlecase.
5779     *
5780     * <p><b>Note:</b> This method cannot handle <a
5781     * href="#supplementary"> supplementary characters</a>. To support
5782     * all Unicode characters, including supplementary characters, use
5783     * the {@link #isLetter(int)} method.
5784     *
5785     * @param   ch   the character to be tested.
5786     * @return  {@code true} if the character is a letter;
5787     *          {@code false} otherwise.
5788     * @see     Character#isDigit(char)
5789     * @see     Character#isJavaIdentifierStart(char)
5790     * @see     Character#isJavaLetter(char)
5791     * @see     Character#isJavaLetterOrDigit(char)
5792     * @see     Character#isLetterOrDigit(char)
5793     * @see     Character#isLowerCase(char)
5794     * @see     Character#isTitleCase(char)
5795     * @see     Character#isUnicodeIdentifierStart(char)
5796     * @see     Character#isUpperCase(char)
5797     */
5798    public static boolean isLetter(char ch) {
5799        return isLetter((int)ch);
5800    }
5801
5802    /**
5803     * Determines if the specified character (Unicode code point) is a letter.
5804     * <p>
5805     * A character is considered to be a letter if its general
5806     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5807     * is any of the following:
5808     * <ul>
5809     * <li> {@code UPPERCASE_LETTER}
5810     * <li> {@code LOWERCASE_LETTER}
5811     * <li> {@code TITLECASE_LETTER}
5812     * <li> {@code MODIFIER_LETTER}
5813     * <li> {@code OTHER_LETTER}
5814     * </ul>
5815     *
5816     * Not all letters have case. Many characters are
5817     * letters but are neither uppercase nor lowercase nor titlecase.
5818     *
5819     * @param   codePoint the character (Unicode code point) to be tested.
5820     * @return  {@code true} if the character is a letter;
5821     *          {@code false} otherwise.
5822     * @see     Character#isDigit(int)
5823     * @see     Character#isJavaIdentifierStart(int)
5824     * @see     Character#isLetterOrDigit(int)
5825     * @see     Character#isLowerCase(int)
5826     * @see     Character#isTitleCase(int)
5827     * @see     Character#isUnicodeIdentifierStart(int)
5828     * @see     Character#isUpperCase(int)
5829     * @since   1.5
5830     */
5831    public static boolean isLetter(int codePoint) {
5832        return isLetterImpl(codePoint);
5833    }
5834
5835    @FastNative
5836    static native boolean isLetterImpl(int codePoint);
5837
5838    /**
5839     * Determines if the specified character is a letter or digit.
5840     * <p>
5841     * A character is considered to be a letter or digit if either
5842     * {@code Character.isLetter(char ch)} or
5843     * {@code Character.isDigit(char ch)} returns
5844     * {@code true} for the character.
5845     *
5846     * <p><b>Note:</b> This method cannot handle <a
5847     * href="#supplementary"> supplementary characters</a>. To support
5848     * all Unicode characters, including supplementary characters, use
5849     * the {@link #isLetterOrDigit(int)} method.
5850     *
5851     * @param   ch   the character to be tested.
5852     * @return  {@code true} if the character is a letter or digit;
5853     *          {@code false} otherwise.
5854     * @see     Character#isDigit(char)
5855     * @see     Character#isJavaIdentifierPart(char)
5856     * @see     Character#isJavaLetter(char)
5857     * @see     Character#isJavaLetterOrDigit(char)
5858     * @see     Character#isLetter(char)
5859     * @see     Character#isUnicodeIdentifierPart(char)
5860     * @since   1.0.2
5861     */
5862    public static boolean isLetterOrDigit(char ch) {
5863        return isLetterOrDigit((int)ch);
5864    }
5865
5866    /**
5867     * Determines if the specified character (Unicode code point) is a letter or digit.
5868     * <p>
5869     * A character is considered to be a letter or digit if either
5870     * {@link #isLetter(int) isLetter(codePoint)} or
5871     * {@link #isDigit(int) isDigit(codePoint)} returns
5872     * {@code true} for the character.
5873     *
5874     * @param   codePoint the character (Unicode code point) to be tested.
5875     * @return  {@code true} if the character is a letter or digit;
5876     *          {@code false} otherwise.
5877     * @see     Character#isDigit(int)
5878     * @see     Character#isJavaIdentifierPart(int)
5879     * @see     Character#isLetter(int)
5880     * @see     Character#isUnicodeIdentifierPart(int)
5881     * @since   1.5
5882     */
5883    public static boolean isLetterOrDigit(int codePoint) {
5884        return isLetterOrDigitImpl(codePoint);
5885    }
5886
5887    @FastNative
5888    static native boolean isLetterOrDigitImpl(int codePoint);
5889
5890    /**
5891     * Determines if the specified character is permissible as the first
5892     * character in a Java identifier.
5893     * <p>
5894     * A character may start a Java identifier if and only if
5895     * one of the following is true:
5896     * <ul>
5897     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5898     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5899     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5900     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5901     * </ul>
5902     *
5903     * @param   ch the character to be tested.
5904     * @return  {@code true} if the character may start a Java
5905     *          identifier; {@code false} otherwise.
5906     * @see     Character#isJavaLetterOrDigit(char)
5907     * @see     Character#isJavaIdentifierStart(char)
5908     * @see     Character#isJavaIdentifierPart(char)
5909     * @see     Character#isLetter(char)
5910     * @see     Character#isLetterOrDigit(char)
5911     * @see     Character#isUnicodeIdentifierStart(char)
5912     * @since   1.02
5913     * @deprecated Replaced by isJavaIdentifierStart(char).
5914     */
5915    @Deprecated
5916    public static boolean isJavaLetter(char ch) {
5917        return isJavaIdentifierStart(ch);
5918    }
5919
5920    /**
5921     * Determines if the specified character may be part of a Java
5922     * identifier as other than the first character.
5923     * <p>
5924     * A character may be part of a Java identifier if and only if any
5925     * of the following are true:
5926     * <ul>
5927     * <li>  it is a letter
5928     * <li>  it is a currency symbol (such as {@code '$'})
5929     * <li>  it is a connecting punctuation character (such as {@code '_'})
5930     * <li>  it is a digit
5931     * <li>  it is a numeric letter (such as a Roman numeral character)
5932     * <li>  it is a combining mark
5933     * <li>  it is a non-spacing mark
5934     * <li> {@code isIdentifierIgnorable} returns
5935     * {@code true} for the character.
5936     * </ul>
5937     *
5938     * @param   ch the character to be tested.
5939     * @return  {@code true} if the character may be part of a
5940     *          Java identifier; {@code false} otherwise.
5941     * @see     Character#isJavaLetter(char)
5942     * @see     Character#isJavaIdentifierStart(char)
5943     * @see     Character#isJavaIdentifierPart(char)
5944     * @see     Character#isLetter(char)
5945     * @see     Character#isLetterOrDigit(char)
5946     * @see     Character#isUnicodeIdentifierPart(char)
5947     * @see     Character#isIdentifierIgnorable(char)
5948     * @since   1.02
5949     * @deprecated Replaced by isJavaIdentifierPart(char).
5950     */
5951    @Deprecated
5952    public static boolean isJavaLetterOrDigit(char ch) {
5953        return isJavaIdentifierPart(ch);
5954    }
5955
5956    /**
5957     * Determines if the specified character (Unicode code point) is an alphabet.
5958     * <p>
5959     * A character is considered to be alphabetic if its general category type,
5960     * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5961     * the following:
5962     * <ul>
5963     * <li> <code>UPPERCASE_LETTER</code>
5964     * <li> <code>LOWERCASE_LETTER</code>
5965     * <li> <code>TITLECASE_LETTER</code>
5966     * <li> <code>MODIFIER_LETTER</code>
5967     * <li> <code>OTHER_LETTER</code>
5968     * <li> <code>LETTER_NUMBER</code>
5969     * </ul>
5970     * or it has contributory property Other_Alphabetic as defined by the
5971     * Unicode Standard.
5972     *
5973     * @param   codePoint the character (Unicode code point) to be tested.
5974     * @return  <code>true</code> if the character is a Unicode alphabet
5975     *          character, <code>false</code> otherwise.
5976     * @since   1.7
5977     */
5978    public static boolean isAlphabetic(int codePoint) {
5979        return isAlphabeticImpl(codePoint);
5980    }
5981
5982    @FastNative
5983    static native boolean isAlphabeticImpl(int codePoint);
5984
5985
5986    /**
5987     * Determines if the specified character (Unicode code point) is a CJKV
5988     * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5989     * the Unicode Standard.
5990     *
5991     * @param   codePoint the character (Unicode code point) to be tested.
5992     * @return  <code>true</code> if the character is a Unicode ideograph
5993     *          character, <code>false</code> otherwise.
5994     * @since   1.7
5995     */
5996    public static boolean isIdeographic(int codePoint) {
5997        return isIdeographicImpl(codePoint);
5998    }
5999    @FastNative
6000    static native boolean isIdeographicImpl(int codePoint);
6001
6002    /**
6003     * Determines if the specified character is
6004     * permissible as the first character in a Java identifier.
6005     * <p>
6006     * A character may start a Java identifier if and only if
6007     * one of the following conditions is true:
6008     * <ul>
6009     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6010     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
6011     * <li> {@code ch} is a currency symbol (such as {@code '$'})
6012     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
6013     * </ul>
6014     *
6015     * <p><b>Note:</b> This method cannot handle <a
6016     * href="#supplementary"> supplementary characters</a>. To support
6017     * all Unicode characters, including supplementary characters, use
6018     * the {@link #isJavaIdentifierStart(int)} method.
6019     *
6020     * @param   ch the character to be tested.
6021     * @return  {@code true} if the character may start a Java identifier;
6022     *          {@code false} otherwise.
6023     * @see     Character#isJavaIdentifierPart(char)
6024     * @see     Character#isLetter(char)
6025     * @see     Character#isUnicodeIdentifierStart(char)
6026     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6027     * @since   1.1
6028     */
6029    public static boolean isJavaIdentifierStart(char ch) {
6030        return isJavaIdentifierStart((int)ch);
6031    }
6032
6033    /**
6034     * Determines if the character (Unicode code point) is
6035     * permissible as the first character in a Java identifier.
6036     * <p>
6037     * A character may start a Java identifier if and only if
6038     * one of the following conditions is true:
6039     * <ul>
6040     * <li> {@link #isLetter(int) isLetter(codePoint)}
6041     *      returns {@code true}
6042     * <li> {@link #getType(int) getType(codePoint)}
6043     *      returns {@code LETTER_NUMBER}
6044     * <li> the referenced character is a currency symbol (such as {@code '$'})
6045     * <li> the referenced character is a connecting punctuation character
6046     *      (such as {@code '_'}).
6047     * </ul>
6048     *
6049     * @param   codePoint the character (Unicode code point) to be tested.
6050     * @return  {@code true} if the character may start a Java identifier;
6051     *          {@code false} otherwise.
6052     * @see     Character#isJavaIdentifierPart(int)
6053     * @see     Character#isLetter(int)
6054     * @see     Character#isUnicodeIdentifierStart(int)
6055     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6056     * @since   1.5
6057     */
6058    public static boolean isJavaIdentifierStart(int codePoint) {
6059        // Use precomputed bitmasks to optimize the ASCII range.
6060        if (codePoint < 64) {
6061            return (codePoint == '$'); // There's only one character in this range.
6062        } else if (codePoint < 128) {
6063            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6064        }
6065        return ((1 << getType(codePoint))
6066                & ((1 << UPPERCASE_LETTER)
6067                   | (1 << LOWERCASE_LETTER)
6068                   | (1  << TITLECASE_LETTER)
6069                   | (1  << MODIFIER_LETTER)
6070                   | (1  << OTHER_LETTER)
6071                   | (1  << CURRENCY_SYMBOL)
6072                   | (1  << CONNECTOR_PUNCTUATION)
6073                   | (1  << LETTER_NUMBER))) != 0;
6074    }
6075
6076    /**
6077     * Determines if the specified character may be part of a Java
6078     * identifier as other than the first character.
6079     * <p>
6080     * A character may be part of a Java identifier if any of the following
6081     * are true:
6082     * <ul>
6083     * <li>  it is a letter
6084     * <li>  it is a currency symbol (such as {@code '$'})
6085     * <li>  it is a connecting punctuation character (such as {@code '_'})
6086     * <li>  it is a digit
6087     * <li>  it is a numeric letter (such as a Roman numeral character)
6088     * <li>  it is a combining mark
6089     * <li>  it is a non-spacing mark
6090     * <li> {@code isIdentifierIgnorable} returns
6091     * {@code true} for the character
6092     * </ul>
6093     *
6094     * <p><b>Note:</b> This method cannot handle <a
6095     * href="#supplementary"> supplementary characters</a>. To support
6096     * all Unicode characters, including supplementary characters, use
6097     * the {@link #isJavaIdentifierPart(int)} method.
6098     *
6099     * @param   ch      the character to be tested.
6100     * @return {@code true} if the character may be part of a
6101     *          Java identifier; {@code false} otherwise.
6102     * @see     Character#isIdentifierIgnorable(char)
6103     * @see     Character#isJavaIdentifierStart(char)
6104     * @see     Character#isLetterOrDigit(char)
6105     * @see     Character#isUnicodeIdentifierPart(char)
6106     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6107     * @since   1.1
6108     */
6109    public static boolean isJavaIdentifierPart(char ch) {
6110        return isJavaIdentifierPart((int)ch);
6111    }
6112
6113    /**
6114     * Determines if the character (Unicode code point) may be part of a Java
6115     * identifier as other than the first character.
6116     * <p>
6117     * A character may be part of a Java identifier if any of the following
6118     * are true:
6119     * <ul>
6120     * <li>  it is a letter
6121     * <li>  it is a currency symbol (such as {@code '$'})
6122     * <li>  it is a connecting punctuation character (such as {@code '_'})
6123     * <li>  it is a digit
6124     * <li>  it is a numeric letter (such as a Roman numeral character)
6125     * <li>  it is a combining mark
6126     * <li>  it is a non-spacing mark
6127     * <li> {@link #isIdentifierIgnorable(int)
6128     * isIdentifierIgnorable(codePoint)} returns {@code true} for
6129     * the character
6130     * </ul>
6131     *
6132     * @param   codePoint the character (Unicode code point) to be tested.
6133     * @return {@code true} if the character may be part of a
6134     *          Java identifier; {@code false} otherwise.
6135     * @see     Character#isIdentifierIgnorable(int)
6136     * @see     Character#isJavaIdentifierStart(int)
6137     * @see     Character#isLetterOrDigit(int)
6138     * @see     Character#isUnicodeIdentifierPart(int)
6139     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6140     * @since   1.5
6141     */
6142    public static boolean isJavaIdentifierPart(int codePoint) {
6143        // Use precomputed bitmasks to optimize the ASCII range.
6144        if (codePoint < 64) {
6145            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
6146        } else if (codePoint < 128) {
6147            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6148        }
6149        return ((1 << getType(codePoint))
6150                & ((1 << UPPERCASE_LETTER)
6151                   | (1 << LOWERCASE_LETTER)
6152                   | (1 << TITLECASE_LETTER)
6153                   | (1 << MODIFIER_LETTER)
6154                   | (1 << OTHER_LETTER)
6155                   | (1 << CURRENCY_SYMBOL)
6156                   | (1 << CONNECTOR_PUNCTUATION)
6157                   | (1 << DECIMAL_DIGIT_NUMBER)
6158                   | (1 << LETTER_NUMBER)
6159                   | (1 << FORMAT)
6160                   | (1 << COMBINING_SPACING_MARK)
6161                   | (1 << NON_SPACING_MARK))) != 0
6162                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
6163                || (codePoint >= 0x7f && codePoint <= 0x9f);
6164    }
6165
6166    /**
6167     * Determines if the specified character is permissible as the
6168     * first character in a Unicode identifier.
6169     * <p>
6170     * A character may start a Unicode identifier if and only if
6171     * one of the following conditions is true:
6172     * <ul>
6173     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6174     * <li> {@link #getType(char) getType(ch)} returns
6175     *      {@code LETTER_NUMBER}.
6176     * </ul>
6177     *
6178     * <p><b>Note:</b> This method cannot handle <a
6179     * href="#supplementary"> supplementary characters</a>. To support
6180     * all Unicode characters, including supplementary characters, use
6181     * the {@link #isUnicodeIdentifierStart(int)} method.
6182     *
6183     * @param   ch      the character to be tested.
6184     * @return  {@code true} if the character may start a Unicode
6185     *          identifier; {@code false} otherwise.
6186     * @see     Character#isJavaIdentifierStart(char)
6187     * @see     Character#isLetter(char)
6188     * @see     Character#isUnicodeIdentifierPart(char)
6189     * @since   1.1
6190     */
6191    public static boolean isUnicodeIdentifierStart(char ch) {
6192        return isUnicodeIdentifierStart((int)ch);
6193    }
6194
6195    /**
6196     * Determines if the specified character (Unicode code point) is permissible as the
6197     * first character in a Unicode identifier.
6198     * <p>
6199     * A character may start a Unicode identifier if and only if
6200     * one of the following conditions is true:
6201     * <ul>
6202     * <li> {@link #isLetter(int) isLetter(codePoint)}
6203     *      returns {@code true}
6204     * <li> {@link #getType(int) getType(codePoint)}
6205     *      returns {@code LETTER_NUMBER}.
6206     * </ul>
6207     * @param   codePoint the character (Unicode code point) to be tested.
6208     * @return  {@code true} if the character may start a Unicode
6209     *          identifier; {@code false} otherwise.
6210     * @see     Character#isJavaIdentifierStart(int)
6211     * @see     Character#isLetter(int)
6212     * @see     Character#isUnicodeIdentifierPart(int)
6213     * @since   1.5
6214     */
6215    public static boolean isUnicodeIdentifierStart(int codePoint) {
6216        return isUnicodeIdentifierStartImpl(codePoint);
6217    }
6218
6219    @FastNative
6220    static native boolean isUnicodeIdentifierStartImpl(int codePoint);
6221
6222    /**
6223     * Determines if the specified character may be part of a Unicode
6224     * identifier as other than the first character.
6225     * <p>
6226     * A character may be part of a Unicode identifier if and only if
6227     * one of the following statements is true:
6228     * <ul>
6229     * <li>  it is a letter
6230     * <li>  it is a connecting punctuation character (such as {@code '_'})
6231     * <li>  it is a digit
6232     * <li>  it is a numeric letter (such as a Roman numeral character)
6233     * <li>  it is a combining mark
6234     * <li>  it is a non-spacing mark
6235     * <li> {@code isIdentifierIgnorable} returns
6236     * {@code true} for this character.
6237     * </ul>
6238     *
6239     * <p><b>Note:</b> This method cannot handle <a
6240     * href="#supplementary"> supplementary characters</a>. To support
6241     * all Unicode characters, including supplementary characters, use
6242     * the {@link #isUnicodeIdentifierPart(int)} method.
6243     *
6244     * @param   ch      the character to be tested.
6245     * @return  {@code true} if the character may be part of a
6246     *          Unicode identifier; {@code false} otherwise.
6247     * @see     Character#isIdentifierIgnorable(char)
6248     * @see     Character#isJavaIdentifierPart(char)
6249     * @see     Character#isLetterOrDigit(char)
6250     * @see     Character#isUnicodeIdentifierStart(char)
6251     * @since   1.1
6252     */
6253    public static boolean isUnicodeIdentifierPart(char ch) {
6254        return isUnicodeIdentifierPart((int)ch);
6255    }
6256
6257    /**
6258     * Determines if the specified character (Unicode code point) may be part of a Unicode
6259     * identifier as other than the first character.
6260     * <p>
6261     * A character may be part of a Unicode identifier if and only if
6262     * one of the following statements is true:
6263     * <ul>
6264     * <li>  it is a letter
6265     * <li>  it is a connecting punctuation character (such as {@code '_'})
6266     * <li>  it is a digit
6267     * <li>  it is a numeric letter (such as a Roman numeral character)
6268     * <li>  it is a combining mark
6269     * <li>  it is a non-spacing mark
6270     * <li> {@code isIdentifierIgnorable} returns
6271     * {@code true} for this character.
6272     * </ul>
6273     * @param   codePoint the character (Unicode code point) to be tested.
6274     * @return  {@code true} if the character may be part of a
6275     *          Unicode identifier; {@code false} otherwise.
6276     * @see     Character#isIdentifierIgnorable(int)
6277     * @see     Character#isJavaIdentifierPart(int)
6278     * @see     Character#isLetterOrDigit(int)
6279     * @see     Character#isUnicodeIdentifierStart(int)
6280     * @since   1.5
6281     */
6282    public static boolean isUnicodeIdentifierPart(int codePoint) {
6283        return isUnicodeIdentifierPartImpl(codePoint);
6284    }
6285
6286    @FastNative
6287    static native boolean isUnicodeIdentifierPartImpl(int codePoint);
6288
6289    /**
6290     * Determines if the specified character should be regarded as
6291     * an ignorable character in a Java identifier or a Unicode identifier.
6292     * <p>
6293     * The following Unicode characters are ignorable in a Java identifier
6294     * or a Unicode identifier:
6295     * <ul>
6296     * <li>ISO control characters that are not whitespace
6297     * <ul>
6298     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6299     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6300     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6301     * </ul>
6302     *
6303     * <li>all characters that have the {@code FORMAT} general
6304     * category value
6305     * </ul>
6306     *
6307     * <p><b>Note:</b> This method cannot handle <a
6308     * href="#supplementary"> supplementary characters</a>. To support
6309     * all Unicode characters, including supplementary characters, use
6310     * the {@link #isIdentifierIgnorable(int)} method.
6311     *
6312     * @param   ch      the character to be tested.
6313     * @return  {@code true} if the character is an ignorable control
6314     *          character that may be part of a Java or Unicode identifier;
6315     *           {@code false} otherwise.
6316     * @see     Character#isJavaIdentifierPart(char)
6317     * @see     Character#isUnicodeIdentifierPart(char)
6318     * @since   1.1
6319     */
6320    public static boolean isIdentifierIgnorable(char ch) {
6321        return isIdentifierIgnorable((int)ch);
6322    }
6323
6324    /**
6325     * Determines if the specified character (Unicode code point) should be regarded as
6326     * an ignorable character in a Java identifier or a Unicode identifier.
6327     * <p>
6328     * The following Unicode characters are ignorable in a Java identifier
6329     * or a Unicode identifier:
6330     * <ul>
6331     * <li>ISO control characters that are not whitespace
6332     * <ul>
6333     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6334     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6335     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6336     * </ul>
6337     *
6338     * <li>all characters that have the {@code FORMAT} general
6339     * category value
6340     * </ul>
6341     *
6342     * @param   codePoint the character (Unicode code point) to be tested.
6343     * @return  {@code true} if the character is an ignorable control
6344     *          character that may be part of a Java or Unicode identifier;
6345     *          {@code false} otherwise.
6346     * @see     Character#isJavaIdentifierPart(int)
6347     * @see     Character#isUnicodeIdentifierPart(int)
6348     * @since   1.5
6349     */
6350    public static boolean isIdentifierIgnorable(int codePoint) {
6351        return isIdentifierIgnorableImpl(codePoint);
6352    }
6353
6354    @FastNative
6355    static native boolean isIdentifierIgnorableImpl(int codePoint);
6356
6357    /**
6358     * Converts the character argument to lowercase using case
6359     * mapping information from the UnicodeData file.
6360     * <p>
6361     * Note that
6362     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6363     * does not always return {@code true} for some ranges of
6364     * characters, particularly those that are symbols or ideographs.
6365     *
6366     * <p>In general, {@link String#toLowerCase()} should be used to map
6367     * characters to lowercase. {@code String} case mapping methods
6368     * have several benefits over {@code Character} case mapping methods.
6369     * {@code String} case mapping methods can perform locale-sensitive
6370     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6371     * the {@code Character} case mapping methods cannot.
6372     *
6373     * <p><b>Note:</b> This method cannot handle <a
6374     * href="#supplementary"> supplementary characters</a>. To support
6375     * all Unicode characters, including supplementary characters, use
6376     * the {@link #toLowerCase(int)} method.
6377     *
6378     * @param   ch   the character to be converted.
6379     * @return  the lowercase equivalent of the character, if any;
6380     *          otherwise, the character itself.
6381     * @see     Character#isLowerCase(char)
6382     * @see     String#toLowerCase()
6383     */
6384    public static char toLowerCase(char ch) {
6385        return (char)toLowerCase((int)ch);
6386    }
6387
6388    /**
6389     * Converts the character (Unicode code point) argument to
6390     * lowercase using case mapping information from the UnicodeData
6391     * file.
6392     *
6393     * <p> Note that
6394     * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6395     * does not always return {@code true} for some ranges of
6396     * characters, particularly those that are symbols or ideographs.
6397     *
6398     * <p>In general, {@link String#toLowerCase()} should be used to map
6399     * characters to lowercase. {@code String} case mapping methods
6400     * have several benefits over {@code Character} case mapping methods.
6401     * {@code String} case mapping methods can perform locale-sensitive
6402     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6403     * the {@code Character} case mapping methods cannot.
6404     *
6405     * @param   codePoint   the character (Unicode code point) to be converted.
6406     * @return  the lowercase equivalent of the character (Unicode code
6407     *          point), if any; otherwise, the character itself.
6408     * @see     Character#isLowerCase(int)
6409     * @see     String#toLowerCase()
6410     *
6411     * @since   1.5
6412     */
6413    public static int toLowerCase(int codePoint) {
6414        if (codePoint >= 'A' && codePoint <= 'Z') {
6415            return codePoint + ('a' - 'A');
6416        }
6417
6418        // All ASCII codepoints except the ones above remain unchanged.
6419        if (codePoint < 0x80) {
6420            return codePoint;
6421        }
6422
6423        return toLowerCaseImpl(codePoint);
6424    }
6425
6426    @FastNative
6427    static native int toLowerCaseImpl(int codePoint);
6428
6429    /**
6430     * Converts the character argument to uppercase using case mapping
6431     * information from the UnicodeData file.
6432     * <p>
6433     * Note that
6434     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6435     * does not always return {@code true} for some ranges of
6436     * characters, particularly those that are symbols or ideographs.
6437     *
6438     * <p>In general, {@link String#toUpperCase()} should be used to map
6439     * characters to uppercase. {@code String} case mapping methods
6440     * have several benefits over {@code Character} case mapping methods.
6441     * {@code String} case mapping methods can perform locale-sensitive
6442     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6443     * the {@code Character} case mapping methods cannot.
6444     *
6445     * <p><b>Note:</b> This method cannot handle <a
6446     * href="#supplementary"> supplementary characters</a>. To support
6447     * all Unicode characters, including supplementary characters, use
6448     * the {@link #toUpperCase(int)} method.
6449     *
6450     * @param   ch   the character to be converted.
6451     * @return  the uppercase equivalent of the character, if any;
6452     *          otherwise, the character itself.
6453     * @see     Character#isUpperCase(char)
6454     * @see     String#toUpperCase()
6455     */
6456    public static char toUpperCase(char ch) {
6457        return (char)toUpperCase((int)ch);
6458    }
6459
6460    /**
6461     * Converts the character (Unicode code point) argument to
6462     * uppercase using case mapping information from the UnicodeData
6463     * file.
6464     *
6465     * <p>Note that
6466     * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6467     * does not always return {@code true} for some ranges of
6468     * characters, particularly those that are symbols or ideographs.
6469     *
6470     * <p>In general, {@link String#toUpperCase()} should be used to map
6471     * characters to uppercase. {@code String} case mapping methods
6472     * have several benefits over {@code Character} case mapping methods.
6473     * {@code String} case mapping methods can perform locale-sensitive
6474     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6475     * the {@code Character} case mapping methods cannot.
6476     *
6477     * @param   codePoint   the character (Unicode code point) to be converted.
6478     * @return  the uppercase equivalent of the character, if any;
6479     *          otherwise, the character itself.
6480     * @see     Character#isUpperCase(int)
6481     * @see     String#toUpperCase()
6482     *
6483     * @since   1.5
6484     */
6485    public static int toUpperCase(int codePoint) {
6486        if (codePoint >= 'a' && codePoint <= 'z') {
6487            return codePoint - ('a' - 'A');
6488        }
6489
6490        // All ASCII codepoints except the ones above remain unchanged.
6491        if (codePoint < 0x80) {
6492            return codePoint;
6493        }
6494
6495        return toUpperCaseImpl(codePoint);
6496    }
6497
6498    @FastNative
6499    static native int toUpperCaseImpl(int codePoint);
6500
6501    /**
6502     * Converts the character argument to titlecase using case mapping
6503     * information from the UnicodeData file. If a character has no
6504     * explicit titlecase mapping and is not itself a titlecase char
6505     * according to UnicodeData, then the uppercase mapping is
6506     * returned as an equivalent titlecase mapping. If the
6507     * {@code char} argument is already a titlecase
6508     * {@code char}, the same {@code char} value will be
6509     * returned.
6510     * <p>
6511     * Note that
6512     * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6513     * does not always return {@code true} for some ranges of
6514     * characters.
6515     *
6516     * <p><b>Note:</b> This method cannot handle <a
6517     * href="#supplementary"> supplementary characters</a>. To support
6518     * all Unicode characters, including supplementary characters, use
6519     * the {@link #toTitleCase(int)} method.
6520     *
6521     * @param   ch   the character to be converted.
6522     * @return  the titlecase equivalent of the character, if any;
6523     *          otherwise, the character itself.
6524     * @see     Character#isTitleCase(char)
6525     * @see     Character#toLowerCase(char)
6526     * @see     Character#toUpperCase(char)
6527     * @since   1.0.2
6528     */
6529    public static char toTitleCase(char ch) {
6530        return (char)toTitleCase((int)ch);
6531    }
6532
6533    /**
6534     * Converts the character (Unicode code point) argument to titlecase using case mapping
6535     * information from the UnicodeData file. If a character has no
6536     * explicit titlecase mapping and is not itself a titlecase char
6537     * according to UnicodeData, then the uppercase mapping is
6538     * returned as an equivalent titlecase mapping. If the
6539     * character argument is already a titlecase
6540     * character, the same character value will be
6541     * returned.
6542     *
6543     * <p>Note that
6544     * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6545     * does not always return {@code true} for some ranges of
6546     * characters.
6547     *
6548     * @param   codePoint   the character (Unicode code point) to be converted.
6549     * @return  the titlecase equivalent of the character, if any;
6550     *          otherwise, the character itself.
6551     * @see     Character#isTitleCase(int)
6552     * @see     Character#toLowerCase(int)
6553     * @see     Character#toUpperCase(int)
6554     * @since   1.5
6555     */
6556    public static int toTitleCase(int codePoint) {
6557        return toTitleCaseImpl(codePoint);
6558    }
6559
6560    @FastNative
6561    static native int toTitleCaseImpl(int codePoint);
6562
6563    /**
6564     * Returns the numeric value of the character {@code ch} in the
6565     * specified radix.
6566     * <p>
6567     * If the radix is not in the range {@code MIN_RADIX} &le;
6568     * {@code radix} &le; {@code MAX_RADIX} or if the
6569     * value of {@code ch} is not a valid digit in the specified
6570     * radix, {@code -1} is returned. A character is a valid digit
6571     * if at least one of the following is true:
6572     * <ul>
6573     * <li>The method {@code isDigit} is {@code true} of the character
6574     *     and the Unicode decimal digit value of the character (or its
6575     *     single-character decomposition) is less than the specified radix.
6576     *     In this case the decimal digit value is returned.
6577     * <li>The character is one of the uppercase Latin letters
6578     *     {@code 'A'} through {@code 'Z'} and its code is less than
6579     *     {@code radix + 'A' - 10}.
6580     *     In this case, {@code ch - 'A' + 10}
6581     *     is returned.
6582     * <li>The character is one of the lowercase Latin letters
6583     *     {@code 'a'} through {@code 'z'} and its code is less than
6584     *     {@code radix + 'a' - 10}.
6585     *     In this case, {@code ch - 'a' + 10}
6586     *     is returned.
6587     * <li>The character is one of the fullwidth uppercase Latin letters A
6588     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6589     *     and its code is less than
6590     *     {@code radix + '\u005CuFF21' - 10}.
6591     *     In this case, {@code ch - '\u005CuFF21' + 10}
6592     *     is returned.
6593     * <li>The character is one of the fullwidth lowercase Latin letters a
6594     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6595     *     and its code is less than
6596     *     {@code radix + '\u005CuFF41' - 10}.
6597     *     In this case, {@code ch - '\u005CuFF41' + 10}
6598     *     is returned.
6599     * </ul>
6600     *
6601     * <p><b>Note:</b> This method cannot handle <a
6602     * href="#supplementary"> supplementary characters</a>. To support
6603     * all Unicode characters, including supplementary characters, use
6604     * the {@link #digit(int, int)} method.
6605     *
6606     * @param   ch      the character to be converted.
6607     * @param   radix   the radix.
6608     * @return  the numeric value represented by the character in the
6609     *          specified radix.
6610     * @see     Character#forDigit(int, int)
6611     * @see     Character#isDigit(char)
6612     */
6613    public static int digit(char ch, int radix) {
6614        return digit((int)ch, radix);
6615    }
6616
6617    /**
6618     * Returns the numeric value of the specified character (Unicode
6619     * code point) in the specified radix.
6620     *
6621     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6622     * {@code radix} &le; {@code MAX_RADIX} or if the
6623     * character is not a valid digit in the specified
6624     * radix, {@code -1} is returned. A character is a valid digit
6625     * if at least one of the following is true:
6626     * <ul>
6627     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6628     *     and the Unicode decimal digit value of the character (or its
6629     *     single-character decomposition) is less than the specified radix.
6630     *     In this case the decimal digit value is returned.
6631     * <li>The character is one of the uppercase Latin letters
6632     *     {@code 'A'} through {@code 'Z'} and its code is less than
6633     *     {@code radix + 'A' - 10}.
6634     *     In this case, {@code codePoint - 'A' + 10}
6635     *     is returned.
6636     * <li>The character is one of the lowercase Latin letters
6637     *     {@code 'a'} through {@code 'z'} and its code is less than
6638     *     {@code radix + 'a' - 10}.
6639     *     In this case, {@code codePoint - 'a' + 10}
6640     *     is returned.
6641     * <li>The character is one of the fullwidth uppercase Latin letters A
6642     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6643     *     and its code is less than
6644     *     {@code radix + '\u005CuFF21' - 10}.
6645     *     In this case,
6646     *     {@code codePoint - '\u005CuFF21' + 10}
6647     *     is returned.
6648     * <li>The character is one of the fullwidth lowercase Latin letters a
6649     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6650     *     and its code is less than
6651     *     {@code radix + '\u005CuFF41'- 10}.
6652     *     In this case,
6653     *     {@code codePoint - '\u005CuFF41' + 10}
6654     *     is returned.
6655     * </ul>
6656     *
6657     * @param   codePoint the character (Unicode code point) to be converted.
6658     * @param   radix   the radix.
6659     * @return  the numeric value represented by the character in the
6660     *          specified radix.
6661     * @see     Character#forDigit(int, int)
6662     * @see     Character#isDigit(int)
6663     * @since   1.5
6664     */
6665    public static int digit(int codePoint, int radix) {
6666        if (radix < MIN_RADIX || radix > MAX_RADIX) {
6667            return -1;
6668        }
6669        if (codePoint < 128) {
6670            // Optimized for ASCII
6671            int result = -1;
6672            if ('0' <= codePoint && codePoint <= '9') {
6673                result = codePoint - '0';
6674            } else if ('a' <= codePoint && codePoint <= 'z') {
6675                result = 10 + (codePoint - 'a');
6676            } else if ('A' <= codePoint && codePoint <= 'Z') {
6677                result = 10 + (codePoint - 'A');
6678            }
6679            return result < radix ? result : -1;
6680        }
6681        return digitImpl(codePoint, radix);
6682    }
6683
6684    @FastNative
6685    native static int digitImpl(int codePoint, int radix);
6686
6687    /**
6688     * Returns the {@code int} value that the specified Unicode
6689     * character represents. For example, the character
6690     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6691     * an int with a value of 50.
6692     * <p>
6693     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6694     * {@code '\u005Cu005A'}), lowercase
6695     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6696     * full width variant ({@code '\u005CuFF21'} through
6697     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6698     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6699     * through 35. This is independent of the Unicode specification,
6700     * which does not assign numeric values to these {@code char}
6701     * values.
6702     * <p>
6703     * If the character does not have a numeric value, then -1 is returned.
6704     * If the character has a numeric value that cannot be represented as a
6705     * nonnegative integer (for example, a fractional value), then -2
6706     * is returned.
6707     *
6708     * <p><b>Note:</b> This method cannot handle <a
6709     * href="#supplementary"> supplementary characters</a>. To support
6710     * all Unicode characters, including supplementary characters, use
6711     * the {@link #getNumericValue(int)} method.
6712     *
6713     * @param   ch      the character to be converted.
6714     * @return  the numeric value of the character, as a nonnegative {@code int}
6715     *           value; -2 if the character has a numeric value that is not a
6716     *          nonnegative integer; -1 if the character has no numeric value.
6717     * @see     Character#forDigit(int, int)
6718     * @see     Character#isDigit(char)
6719     * @since   1.1
6720     */
6721    public static int getNumericValue(char ch) {
6722        return getNumericValue((int)ch);
6723    }
6724
6725    /**
6726     * Returns the {@code int} value that the specified
6727     * character (Unicode code point) represents. For example, the character
6728     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6729     * an {@code int} with a value of 50.
6730     * <p>
6731     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6732     * {@code '\u005Cu005A'}), lowercase
6733     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6734     * full width variant ({@code '\u005CuFF21'} through
6735     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6736     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6737     * through 35. This is independent of the Unicode specification,
6738     * which does not assign numeric values to these {@code char}
6739     * values.
6740     * <p>
6741     * If the character does not have a numeric value, then -1 is returned.
6742     * If the character has a numeric value that cannot be represented as a
6743     * nonnegative integer (for example, a fractional value), then -2
6744     * is returned.
6745     *
6746     * @param   codePoint the character (Unicode code point) to be converted.
6747     * @return  the numeric value of the character, as a nonnegative {@code int}
6748     *          value; -2 if the character has a numeric value that is not a
6749     *          nonnegative integer; -1 if the character has no numeric value.
6750     * @see     Character#forDigit(int, int)
6751     * @see     Character#isDigit(int)
6752     * @since   1.5
6753     */
6754    public static int getNumericValue(int codePoint) {
6755        // This is both an optimization and papers over differences between Java and ICU.
6756        if (codePoint < 128) {
6757            if (codePoint >= '0' && codePoint <= '9') {
6758                return codePoint - '0';
6759            }
6760            if (codePoint >= 'a' && codePoint <= 'z') {
6761                return codePoint - ('a' - 10);
6762            }
6763            if (codePoint >= 'A' && codePoint <= 'Z') {
6764                return codePoint - ('A' - 10);
6765            }
6766            return -1;
6767        }
6768        // Full-width uppercase A-Z.
6769        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
6770            return codePoint - 0xff17;
6771        }
6772        // Full-width lowercase a-z.
6773        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
6774            return codePoint - 0xff37;
6775        }
6776        return getNumericValueImpl(codePoint);
6777    }
6778
6779    @FastNative
6780    native static int getNumericValueImpl(int codePoint);
6781
6782    /**
6783     * Determines if the specified character is ISO-LATIN-1 white space.
6784     * This method returns {@code true} for the following five
6785     * characters only:
6786     * <table summary="truechars">
6787     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6788     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6789     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6790     *     <td>{@code NEW LINE}</td></tr>
6791     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6792     *     <td>{@code FORM FEED}</td></tr>
6793     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6794     *     <td>{@code CARRIAGE RETURN}</td></tr>
6795     * <tr><td>{@code ' '}</td>             <td>{@code U+0020}</td>
6796     *     <td>{@code SPACE}</td></tr>
6797     * </table>
6798     *
6799     * @param      ch   the character to be tested.
6800     * @return     {@code true} if the character is ISO-LATIN-1 white
6801     *             space; {@code false} otherwise.
6802     * @see        Character#isSpaceChar(char)
6803     * @see        Character#isWhitespace(char)
6804     * @deprecated Replaced by isWhitespace(char).
6805     */
6806    @Deprecated
6807    public static boolean isSpace(char ch) {
6808        return (ch <= 0x0020) &&
6809            (((((1L << 0x0009) |
6810            (1L << 0x000A) |
6811            (1L << 0x000C) |
6812            (1L << 0x000D) |
6813            (1L << 0x0020)) >> ch) & 1L) != 0);
6814    }
6815
6816
6817    /**
6818     * Determines if the specified character is a Unicode space character.
6819     * A character is considered to be a space character if and only if
6820     * it is specified to be a space character by the Unicode Standard. This
6821     * method returns true if the character's general category type is any of
6822     * the following:
6823     * <ul>
6824     * <li> {@code SPACE_SEPARATOR}
6825     * <li> {@code LINE_SEPARATOR}
6826     * <li> {@code PARAGRAPH_SEPARATOR}
6827     * </ul>
6828     *
6829     * <p><b>Note:</b> This method cannot handle <a
6830     * href="#supplementary"> supplementary characters</a>. To support
6831     * all Unicode characters, including supplementary characters, use
6832     * the {@link #isSpaceChar(int)} method.
6833     *
6834     * @param   ch      the character to be tested.
6835     * @return  {@code true} if the character is a space character;
6836     *          {@code false} otherwise.
6837     * @see     Character#isWhitespace(char)
6838     * @since   1.1
6839     */
6840    public static boolean isSpaceChar(char ch) {
6841        return isSpaceChar((int)ch);
6842    }
6843
6844    /**
6845     * Determines if the specified character (Unicode code point) is a
6846     * Unicode space character.  A character is considered to be a
6847     * space character if and only if it is specified to be a space
6848     * character by the Unicode Standard. This method returns true if
6849     * the character's general category type is any of the following:
6850     *
6851     * <ul>
6852     * <li> {@link #SPACE_SEPARATOR}
6853     * <li> {@link #LINE_SEPARATOR}
6854     * <li> {@link #PARAGRAPH_SEPARATOR}
6855     * </ul>
6856     *
6857     * @param   codePoint the character (Unicode code point) to be tested.
6858     * @return  {@code true} if the character is a space character;
6859     *          {@code false} otherwise.
6860     * @see     Character#isWhitespace(int)
6861     * @since   1.5
6862     */
6863    public static boolean isSpaceChar(int codePoint) {
6864        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6865        // SPACE or NO-BREAK SPACE?
6866        if (codePoint == 0x20 || codePoint == 0xa0) {
6867            return true;
6868        }
6869        if (codePoint < 0x1000) {
6870            return false;
6871        }
6872        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6873        if (codePoint == 0x1680 || codePoint == 0x180e) {
6874            return true;
6875        }
6876        if (codePoint < 0x2000) {
6877            return false;
6878        }
6879        if (codePoint <= 0xffff) {
6880            // Other whitespace from General Punctuation...
6881            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
6882                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6883        }
6884        // Let icu4c worry about non-BMP code points.
6885        return isSpaceCharImpl(codePoint);
6886    }
6887
6888    @FastNative
6889    static native boolean isSpaceCharImpl(int codePoint);
6890
6891    /**
6892     * Determines if the specified character is white space according to Java.
6893     * A character is a Java whitespace character if and only if it satisfies
6894     * one of the following criteria:
6895     * <ul>
6896     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6897     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6898     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6899     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6900     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6901     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6902     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6903     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6904     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6905     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6906     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6907     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6908     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6909     * </ul>
6910     *
6911     * <p><b>Note:</b> This method cannot handle <a
6912     * href="#supplementary"> supplementary characters</a>. To support
6913     * all Unicode characters, including supplementary characters, use
6914     * the {@link #isWhitespace(int)} method.
6915     *
6916     * @param   ch the character to be tested.
6917     * @return  {@code true} if the character is a Java whitespace
6918     *          character; {@code false} otherwise.
6919     * @see     Character#isSpaceChar(char)
6920     * @since   1.1
6921     */
6922    public static boolean isWhitespace(char ch) {
6923        return isWhitespace((int)ch);
6924    }
6925
6926    /**
6927     * Determines if the specified character (Unicode code point) is
6928     * white space according to Java.  A character is a Java
6929     * whitespace character if and only if it satisfies one of the
6930     * following criteria:
6931     * <ul>
6932     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6933     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6934     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6935     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6936     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6937     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6938     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6939     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6940     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6941     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6942     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6943     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6944     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6945     * </ul>
6946     * <p>
6947     *
6948     * @param   codePoint the character (Unicode code point) to be tested.
6949     * @return  {@code true} if the character is a Java whitespace
6950     *          character; {@code false} otherwise.
6951     * @see     Character#isSpaceChar(int)
6952     * @since   1.5
6953     */
6954    public static boolean isWhitespace(int codePoint) {
6955        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6956        // Any ASCII whitespace character?
6957        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
6958            return true;
6959        }
6960        if (codePoint < 0x1000) {
6961            return false;
6962        }
6963        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6964        if (codePoint == 0x1680 || codePoint == 0x180e) {
6965            return true;
6966        }
6967        if (codePoint < 0x2000) {
6968            return false;
6969        }
6970        // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
6971        if (codePoint == 0x2007 || codePoint == 0x202f) {
6972            return false;
6973        }
6974        if (codePoint <= 0xffff) {
6975            // Other whitespace from General Punctuation...
6976            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
6977                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6978        }
6979        // Let icu4c worry about non-BMP code points.
6980        return isWhitespaceImpl(codePoint);
6981    }
6982
6983    @FastNative
6984    native static boolean isWhitespaceImpl(int codePoint);
6985
6986    /**
6987     * Determines if the specified character is an ISO control
6988     * character.  A character is considered to be an ISO control
6989     * character if its code is in the range {@code '\u005Cu0000'}
6990     * through {@code '\u005Cu001F'} or in the range
6991     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6992     *
6993     * <p><b>Note:</b> This method cannot handle <a
6994     * href="#supplementary"> supplementary characters</a>. To support
6995     * all Unicode characters, including supplementary characters, use
6996     * the {@link #isISOControl(int)} method.
6997     *
6998     * @param   ch      the character to be tested.
6999     * @return  {@code true} if the character is an ISO control character;
7000     *          {@code false} otherwise.
7001     *
7002     * @see     Character#isSpaceChar(char)
7003     * @see     Character#isWhitespace(char)
7004     * @since   1.1
7005     */
7006    public static boolean isISOControl(char ch) {
7007        return isISOControl((int)ch);
7008    }
7009
7010    /**
7011     * Determines if the referenced character (Unicode code point) is an ISO control
7012     * character.  A character is considered to be an ISO control
7013     * character if its code is in the range {@code '\u005Cu0000'}
7014     * through {@code '\u005Cu001F'} or in the range
7015     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
7016     *
7017     * @param   codePoint the character (Unicode code point) to be tested.
7018     * @return  {@code true} if the character is an ISO control character;
7019     *          {@code false} otherwise.
7020     * @see     Character#isSpaceChar(int)
7021     * @see     Character#isWhitespace(int)
7022     * @since   1.5
7023     */
7024    public static boolean isISOControl(int codePoint) {
7025        // Optimized form of:
7026        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
7027        //     (codePoint >= 0x7F && codePoint <= 0x9F);
7028        return codePoint <= 0x9F &&
7029            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
7030    }
7031
7032    /**
7033     * Returns a value indicating a character's general category.
7034     *
7035     * <p><b>Note:</b> This method cannot handle <a
7036     * href="#supplementary"> supplementary characters</a>. To support
7037     * all Unicode characters, including supplementary characters, use
7038     * the {@link #getType(int)} method.
7039     *
7040     * @param   ch      the character to be tested.
7041     * @return  a value of type {@code int} representing the
7042     *          character's general category.
7043     * @see     Character#COMBINING_SPACING_MARK
7044     * @see     Character#CONNECTOR_PUNCTUATION
7045     * @see     Character#CONTROL
7046     * @see     Character#CURRENCY_SYMBOL
7047     * @see     Character#DASH_PUNCTUATION
7048     * @see     Character#DECIMAL_DIGIT_NUMBER
7049     * @see     Character#ENCLOSING_MARK
7050     * @see     Character#END_PUNCTUATION
7051     * @see     Character#FINAL_QUOTE_PUNCTUATION
7052     * @see     Character#FORMAT
7053     * @see     Character#INITIAL_QUOTE_PUNCTUATION
7054     * @see     Character#LETTER_NUMBER
7055     * @see     Character#LINE_SEPARATOR
7056     * @see     Character#LOWERCASE_LETTER
7057     * @see     Character#MATH_SYMBOL
7058     * @see     Character#MODIFIER_LETTER
7059     * @see     Character#MODIFIER_SYMBOL
7060     * @see     Character#NON_SPACING_MARK
7061     * @see     Character#OTHER_LETTER
7062     * @see     Character#OTHER_NUMBER
7063     * @see     Character#OTHER_PUNCTUATION
7064     * @see     Character#OTHER_SYMBOL
7065     * @see     Character#PARAGRAPH_SEPARATOR
7066     * @see     Character#PRIVATE_USE
7067     * @see     Character#SPACE_SEPARATOR
7068     * @see     Character#START_PUNCTUATION
7069     * @see     Character#SURROGATE
7070     * @see     Character#TITLECASE_LETTER
7071     * @see     Character#UNASSIGNED
7072     * @see     Character#UPPERCASE_LETTER
7073     * @since   1.1
7074     */
7075    public static int getType(char ch) {
7076        return getType((int)ch);
7077    }
7078
7079    /**
7080     * Returns a value indicating a character's general category.
7081     *
7082     * @param   codePoint the character (Unicode code point) to be tested.
7083     * @return  a value of type {@code int} representing the
7084     *          character's general category.
7085     * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
7086     * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
7087     * @see     Character#CONTROL CONTROL
7088     * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
7089     * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
7090     * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
7091     * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
7092     * @see     Character#END_PUNCTUATION END_PUNCTUATION
7093     * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
7094     * @see     Character#FORMAT FORMAT
7095     * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
7096     * @see     Character#LETTER_NUMBER LETTER_NUMBER
7097     * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
7098     * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
7099     * @see     Character#MATH_SYMBOL MATH_SYMBOL
7100     * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
7101     * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
7102     * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
7103     * @see     Character#OTHER_LETTER OTHER_LETTER
7104     * @see     Character#OTHER_NUMBER OTHER_NUMBER
7105     * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
7106     * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
7107     * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
7108     * @see     Character#PRIVATE_USE PRIVATE_USE
7109     * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
7110     * @see     Character#START_PUNCTUATION START_PUNCTUATION
7111     * @see     Character#SURROGATE SURROGATE
7112     * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
7113     * @see     Character#UNASSIGNED UNASSIGNED
7114     * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
7115     * @since   1.5
7116     */
7117    public static int getType(int codePoint) {
7118        int type = getTypeImpl(codePoint);
7119        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
7120        if (type <= Character.FORMAT) {
7121            return type;
7122        }
7123        return (type + 1);
7124    }
7125
7126    @FastNative
7127    static native int getTypeImpl(int codePoint);
7128
7129    /**
7130     * Determines the character representation for a specific digit in
7131     * the specified radix. If the value of {@code radix} is not a
7132     * valid radix, or the value of {@code digit} is not a valid
7133     * digit in the specified radix, the null character
7134     * ({@code '\u005Cu0000'}) is returned.
7135     * <p>
7136     * The {@code radix} argument is valid if it is greater than or
7137     * equal to {@code MIN_RADIX} and less than or equal to
7138     * {@code MAX_RADIX}. The {@code digit} argument is valid if
7139     * {@code 0 <= digit < radix}.
7140     * <p>
7141     * If the digit is less than 10, then
7142     * {@code '0' + digit} is returned. Otherwise, the value
7143     * {@code 'a' + digit - 10} is returned.
7144     *
7145     * @param   digit   the number to convert to a character.
7146     * @param   radix   the radix.
7147     * @return  the {@code char} representation of the specified digit
7148     *          in the specified radix.
7149     * @see     Character#MIN_RADIX
7150     * @see     Character#MAX_RADIX
7151     * @see     Character#digit(char, int)
7152     */
7153    public static char forDigit(int digit, int radix) {
7154        if ((digit >= radix) || (digit < 0)) {
7155            return '\0';
7156        }
7157        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
7158            return '\0';
7159        }
7160        if (digit < 10) {
7161            return (char)('0' + digit);
7162        }
7163        return (char)('a' - 10 + digit);
7164    }
7165
7166    /**
7167     * Returns the Unicode directionality property for the given
7168     * character.  Character directionality is used to calculate the
7169     * visual ordering of text. The directionality value of undefined
7170     * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7171     *
7172     * <p><b>Note:</b> This method cannot handle <a
7173     * href="#supplementary"> supplementary characters</a>. To support
7174     * all Unicode characters, including supplementary characters, use
7175     * the {@link #getDirectionality(int)} method.
7176     *
7177     * @param  ch {@code char} for which the directionality property
7178     *            is requested.
7179     * @return the directionality property of the {@code char} value.
7180     *
7181     * @see Character#DIRECTIONALITY_UNDEFINED
7182     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7183     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7184     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7185     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7186     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7187     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7188     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
7189     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7190     * @see Character#DIRECTIONALITY_NONSPACING_MARK
7191     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7192     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7193     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7194     * @see Character#DIRECTIONALITY_WHITESPACE
7195     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7196     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7197     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7198     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7199     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7200     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7201     * @since 1.4
7202     */
7203    public static byte getDirectionality(char ch) {
7204        return getDirectionality((int)ch);
7205    }
7206
7207    /**
7208     * Returns the Unicode directionality property for the given
7209     * character (Unicode code point).  Character directionality is
7210     * used to calculate the visual ordering of text. The
7211     * directionality value of undefined character is {@link
7212     * #DIRECTIONALITY_UNDEFINED}.
7213     *
7214     * @param   codePoint the character (Unicode code point) for which
7215     *          the directionality property is requested.
7216     * @return the directionality property of the character.
7217     *
7218     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7219     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7220     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7221     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7222     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7223     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7224     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7225     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7226     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7227     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7228     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7229     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7230     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7231     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7232     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7233     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7234     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7235     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7236     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7237     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7238     * @since    1.5
7239     */
7240    public static byte getDirectionality(int codePoint) {
7241        if (getType(codePoint) == Character.UNASSIGNED) {
7242            return Character.DIRECTIONALITY_UNDEFINED;
7243        }
7244
7245        byte directionality = getDirectionalityImpl(codePoint);
7246        if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
7247            return DIRECTIONALITY[directionality];
7248        }
7249        return Character.DIRECTIONALITY_UNDEFINED;
7250    }
7251
7252    @FastNative
7253    native static byte getDirectionalityImpl(int codePoint);
7254    /**
7255     * Determines whether the character is mirrored according to the
7256     * Unicode specification.  Mirrored characters should have their
7257     * glyphs horizontally mirrored when displayed in text that is
7258     * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7259     * PARENTHESIS is semantically defined to be an <i>opening
7260     * parenthesis</i>.  This will appear as a "(" in text that is
7261     * left-to-right but as a ")" in text that is right-to-left.
7262     *
7263     * <p><b>Note:</b> This method cannot handle <a
7264     * href="#supplementary"> supplementary characters</a>. To support
7265     * all Unicode characters, including supplementary characters, use
7266     * the {@link #isMirrored(int)} method.
7267     *
7268     * @param  ch {@code char} for which the mirrored property is requested
7269     * @return {@code true} if the char is mirrored, {@code false}
7270     *         if the {@code char} is not mirrored or is not defined.
7271     * @since 1.4
7272     */
7273    public static boolean isMirrored(char ch) {
7274        return isMirrored((int)ch);
7275    }
7276
7277    /**
7278     * Determines whether the specified character (Unicode code point)
7279     * is mirrored according to the Unicode specification.  Mirrored
7280     * characters should have their glyphs horizontally mirrored when
7281     * displayed in text that is right-to-left.  For example,
7282     * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7283     * defined to be an <i>opening parenthesis</i>.  This will appear
7284     * as a "(" in text that is left-to-right but as a ")" in text
7285     * that is right-to-left.
7286     *
7287     * @param   codePoint the character (Unicode code point) to be tested.
7288     * @return  {@code true} if the character is mirrored, {@code false}
7289     *          if the character is not mirrored or is not defined.
7290     * @since   1.5
7291     */
7292    public static boolean isMirrored(int codePoint) {
7293        return isMirroredImpl(codePoint);
7294    }
7295
7296    @FastNative
7297    native static boolean isMirroredImpl(int codePoint);
7298    /**
7299     * Compares two {@code Character} objects numerically.
7300     *
7301     * @param   anotherCharacter   the {@code Character} to be compared.
7302
7303     * @return  the value {@code 0} if the argument {@code Character}
7304     *          is equal to this {@code Character}; a value less than
7305     *          {@code 0} if this {@code Character} is numerically less
7306     *          than the {@code Character} argument; and a value greater than
7307     *          {@code 0} if this {@code Character} is numerically greater
7308     *          than the {@code Character} argument (unsigned comparison).
7309     *          Note that this is strictly a numerical comparison; it is not
7310     *          locale-dependent.
7311     * @since   1.2
7312     */
7313    public int compareTo(Character anotherCharacter) {
7314        return compare(this.value, anotherCharacter.value);
7315    }
7316
7317    /**
7318     * Compares two {@code char} values numerically.
7319     * The value returned is identical to what would be returned by:
7320     * <pre>
7321     *    Character.valueOf(x).compareTo(Character.valueOf(y))
7322     * </pre>
7323     *
7324     * @param  x the first {@code char} to compare
7325     * @param  y the second {@code char} to compare
7326     * @return the value {@code 0} if {@code x == y};
7327     *         a value less than {@code 0} if {@code x < y}; and
7328     *         a value greater than {@code 0} if {@code x > y}
7329     * @since 1.7
7330     */
7331    public static int compare(char x, char y) {
7332        return x - y;
7333    }
7334
7335    /**
7336     * The number of bits used to represent a <tt>char</tt> value in unsigned
7337     * binary form, constant {@code 16}.
7338     *
7339     * @since 1.5
7340     */
7341    public static final int SIZE = 16;
7342
7343    /**
7344     * The number of bytes used to represent a {@code char} value in unsigned
7345     * binary form.
7346     *
7347     * @since 1.8
7348     */
7349    public static final int BYTES = SIZE / Byte.SIZE;
7350
7351    /**
7352     * Returns the value obtained by reversing the order of the bytes in the
7353     * specified <tt>char</tt> value.
7354     *
7355     * @param ch The {@code char} of which to reverse the byte order.
7356     * @return the value obtained by reversing (or, equivalently, swapping)
7357     *     the bytes in the specified <tt>char</tt> value.
7358     * @since 1.5
7359     */
7360    public static char reverseBytes(char ch) {
7361        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7362    }
7363
7364    /**
7365     * Returns the Unicode name of the specified character
7366     * {@code codePoint}, or null if the code point is
7367     * {@link #UNASSIGNED unassigned}.
7368     * <p>
7369     * Note: if the specified character is not assigned a name by
7370     * the <i>UnicodeData</i> file (part of the Unicode Character
7371     * Database maintained by the Unicode Consortium), the returned
7372     * name is the same as the result of expression.
7373     *
7374     * <blockquote>{@code
7375     *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7376     *     + " "
7377     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7378     *
7379     * }</blockquote>
7380     *
7381     * @param  codePoint the character (Unicode code point)
7382     *
7383     * @return the Unicode name of the specified character, or null if
7384     *         the code point is unassigned.
7385     *
7386     * @exception IllegalArgumentException if the specified
7387     *            {@code codePoint} is not a valid Unicode
7388     *            code point.
7389     *
7390     * @since 1.7
7391     */
7392    public static String getName(int codePoint) {
7393        if (!isValidCodePoint(codePoint)) {
7394            throw new IllegalArgumentException();
7395        }
7396        String name = getNameImpl(codePoint);
7397        if (name != null)
7398            return name;
7399        if (getType(codePoint) == UNASSIGNED)
7400            return null;
7401        UnicodeBlock block = UnicodeBlock.of(codePoint);
7402        if (block != null)
7403            return block.toString().replace('_', ' ') + " "
7404                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7405        // should never come here
7406        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7407    }
7408
7409    private static native String getNameImpl(int codePoint);
7410}
7411