Character.java revision 945919f928f7d7ec26b4fcb57c77253d67e3553c
1/*
2 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28import java.util.Arrays;
29import java.util.HashMap;
30import java.util.Locale;
31import java.util.Map;
32
33/**
34 * The {@code Character} class wraps a value of the primitive
35 * type {@code char} in an object. An object of type
36 * {@code Character} contains a single field whose type is
37 * {@code char}.
38 * <p>
39 * In addition, this class provides several methods for determining
40 * a character's category (lowercase letter, digit, etc.) and for converting
41 * characters from uppercase to lowercase and vice versa.
42 * <p>
43 * Character information is based on the Unicode Standard, version 6.2.0.
44 * <p>
45 * The methods and data of class {@code Character} are defined by
46 * the information in the <i>UnicodeData</i> file that is part of the
47 * Unicode Character Database maintained by the Unicode
48 * Consortium. This file specifies various properties including name
49 * and general category for every defined Unicode code point or
50 * character range.
51 * <p>
52 * The file and its description are available from the Unicode Consortium at:
53 * <ul>
54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
55 * </ul>
56 *
57 * <h3><a name="unicode">Unicode Character Representations</a></h3>
58 *
59 * <p>The {@code char} data type (and therefore the value that a
60 * {@code Character} object encapsulates) are based on the
61 * original Unicode specification, which defined characters as
62 * fixed-width 16-bit entities. The Unicode Standard has since been
63 * changed to allow for characters whose representation requires more
64 * than 16 bits.  The range of legal <em>code point</em>s is now
65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
66 * (Refer to the <a
67 * href="http://www.unicode.org/reports/tr27/#notation"><i>
68 * definition</i></a> of the U+<i>n</i> notation in the Unicode
69 * Standard.)
70 *
71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
73 * <a name="supplementary">Characters</a> whose code points are greater
74 * than U+FFFF are called <em>supplementary character</em>s.  The Java
75 * platform uses the UTF-16 representation in {@code char} arrays and
76 * in the {@code String} and {@code StringBuffer} classes. In
77 * this representation, supplementary characters are represented as a pair
78 * of {@code char} values, the first from the <em>high-surrogates</em>
79 * range, (&#92;uD800-&#92;uDBFF), the second from the
80 * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
81 *
82 * <p>A {@code char} value, therefore, represents Basic
83 * Multilingual Plane (BMP) code points, including the surrogate
84 * code points, or code units of the UTF-16 encoding. An
85 * {@code int} value represents all Unicode code points,
86 * including supplementary code points. The lower (least significant)
87 * 21 bits of {@code int} are used to represent Unicode code
88 * points and the upper (most significant) 11 bits must be zero.
89 * Unless otherwise specified, the behavior with respect to
90 * supplementary characters and surrogate {@code char} values is
91 * as follows:
92 *
93 * <ul>
94 * <li>The methods that only accept a {@code char} value cannot support
95 * supplementary characters. They treat {@code char} values from the
96 * surrogate ranges as undefined characters. For example,
97 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
98 * this specific value if followed by any low-surrogate value in a string
99 * would represent a letter.
100 *
101 * <li>The methods that accept an {@code int} value support all
102 * Unicode characters, including supplementary characters. For
103 * example, {@code Character.isLetter(0x2F81A)} returns
104 * {@code true} because the code point value represents a letter
105 * (a CJK ideograph).
106 * </ul>
107 *
108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
109 * used for character values in the range between U+0000 and U+10FFFF,
110 * and <em>Unicode code unit</em> is used for 16-bit
111 * {@code char} values that are code units of the <em>UTF-16</em>
112 * encoding. For more information on Unicode terminology, refer to the
113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
114 *
115 * @author  Lee Boynton
116 * @author  Guy Steele
117 * @author  Akira Tanaka
118 * @author  Martin Buchholz
119 * @author  Ulf Zibis
120 * @since   1.0
121 */
122public final
123class Character implements java.io.Serializable, Comparable<Character> {
124    /**
125     * The minimum radix available for conversion to and from strings.
126     * The constant value of this field is the smallest value permitted
127     * for the radix argument in radix-conversion methods such as the
128     * {@code digit} method, the {@code forDigit} method, and the
129     * {@code toString} method of class {@code Integer}.
130     *
131     * @see     Character#digit(char, int)
132     * @see     Character#forDigit(int, int)
133     * @see     Integer#toString(int, int)
134     * @see     Integer#valueOf(String)
135     */
136    public static final int MIN_RADIX = 2;
137
138    /**
139     * The maximum radix available for conversion to and from strings.
140     * The constant value of this field is the largest value permitted
141     * for the radix argument in radix-conversion methods such as the
142     * {@code digit} method, the {@code forDigit} method, and the
143     * {@code toString} method of class {@code Integer}.
144     *
145     * @see     Character#digit(char, int)
146     * @see     Character#forDigit(int, int)
147     * @see     Integer#toString(int, int)
148     * @see     Integer#valueOf(String)
149     */
150    public static final int MAX_RADIX = 36;
151
152    /**
153     * The constant value of this field is the smallest value of type
154     * {@code char}, {@code '\u005Cu0000'}.
155     *
156     * @since   1.0.2
157     */
158    public static final char MIN_VALUE = '\u0000';
159
160    /**
161     * The constant value of this field is the largest value of type
162     * {@code char}, {@code '\u005CuFFFF'}.
163     *
164     * @since   1.0.2
165     */
166    public static final char MAX_VALUE = '\uFFFF';
167
168    /**
169     * The {@code Class} instance representing the primitive type
170     * {@code char}.
171     *
172     * @since   1.1
173     */
174    @SuppressWarnings("unchecked")
175    public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
176
177    /*
178     * Normative general types
179     */
180
181    /*
182     * General character types
183     */
184
185    /**
186     * General category "Cn" in the Unicode specification.
187     * @since   1.1
188     */
189    public static final byte UNASSIGNED = 0;
190
191    /**
192     * General category "Lu" in the Unicode specification.
193     * @since   1.1
194     */
195    public static final byte UPPERCASE_LETTER = 1;
196
197    /**
198     * General category "Ll" in the Unicode specification.
199     * @since   1.1
200     */
201    public static final byte LOWERCASE_LETTER = 2;
202
203    /**
204     * General category "Lt" in the Unicode specification.
205     * @since   1.1
206     */
207    public static final byte TITLECASE_LETTER = 3;
208
209    /**
210     * General category "Lm" in the Unicode specification.
211     * @since   1.1
212     */
213    public static final byte MODIFIER_LETTER = 4;
214
215    /**
216     * General category "Lo" in the Unicode specification.
217     * @since   1.1
218     */
219    public static final byte OTHER_LETTER = 5;
220
221    /**
222     * General category "Mn" in the Unicode specification.
223     * @since   1.1
224     */
225    public static final byte NON_SPACING_MARK = 6;
226
227    /**
228     * General category "Me" in the Unicode specification.
229     * @since   1.1
230     */
231    public static final byte ENCLOSING_MARK = 7;
232
233    /**
234     * General category "Mc" in the Unicode specification.
235     * @since   1.1
236     */
237    public static final byte COMBINING_SPACING_MARK = 8;
238
239    /**
240     * General category "Nd" in the Unicode specification.
241     * @since   1.1
242     */
243    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
244
245    /**
246     * General category "Nl" in the Unicode specification.
247     * @since   1.1
248     */
249    public static final byte LETTER_NUMBER = 10;
250
251    /**
252     * General category "No" in the Unicode specification.
253     * @since   1.1
254     */
255    public static final byte OTHER_NUMBER = 11;
256
257    /**
258     * General category "Zs" in the Unicode specification.
259     * @since   1.1
260     */
261    public static final byte SPACE_SEPARATOR = 12;
262
263    /**
264     * General category "Zl" in the Unicode specification.
265     * @since   1.1
266     */
267    public static final byte LINE_SEPARATOR = 13;
268
269    /**
270     * General category "Zp" in the Unicode specification.
271     * @since   1.1
272     */
273    public static final byte PARAGRAPH_SEPARATOR = 14;
274
275    /**
276     * General category "Cc" in the Unicode specification.
277     * @since   1.1
278     */
279    public static final byte CONTROL = 15;
280
281    /**
282     * General category "Cf" in the Unicode specification.
283     * @since   1.1
284     */
285    public static final byte FORMAT = 16;
286
287    /**
288     * General category "Co" in the Unicode specification.
289     * @since   1.1
290     */
291    public static final byte PRIVATE_USE = 18;
292
293    /**
294     * General category "Cs" in the Unicode specification.
295     * @since   1.1
296     */
297    public static final byte SURROGATE = 19;
298
299    /**
300     * General category "Pd" in the Unicode specification.
301     * @since   1.1
302     */
303    public static final byte DASH_PUNCTUATION = 20;
304
305    /**
306     * General category "Ps" in the Unicode specification.
307     * @since   1.1
308     */
309    public static final byte START_PUNCTUATION = 21;
310
311    /**
312     * General category "Pe" in the Unicode specification.
313     * @since   1.1
314     */
315    public static final byte END_PUNCTUATION = 22;
316
317    /**
318     * General category "Pc" in the Unicode specification.
319     * @since   1.1
320     */
321    public static final byte CONNECTOR_PUNCTUATION = 23;
322
323    /**
324     * General category "Po" in the Unicode specification.
325     * @since   1.1
326     */
327    public static final byte OTHER_PUNCTUATION = 24;
328
329    /**
330     * General category "Sm" in the Unicode specification.
331     * @since   1.1
332     */
333    public static final byte MATH_SYMBOL = 25;
334
335    /**
336     * General category "Sc" in the Unicode specification.
337     * @since   1.1
338     */
339    public static final byte CURRENCY_SYMBOL = 26;
340
341    /**
342     * General category "Sk" in the Unicode specification.
343     * @since   1.1
344     */
345    public static final byte MODIFIER_SYMBOL = 27;
346
347    /**
348     * General category "So" in the Unicode specification.
349     * @since   1.1
350     */
351    public static final byte OTHER_SYMBOL = 28;
352
353    /**
354     * General category "Pi" in the Unicode specification.
355     * @since   1.4
356     */
357    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
358
359    /**
360     * General category "Pf" in the Unicode specification.
361     * @since   1.4
362     */
363    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
364
365    /**
366     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
367     */
368    static final int ERROR = 0xFFFFFFFF;
369
370
371    /**
372     * Undefined bidirectional character type. Undefined {@code char}
373     * values have undefined directionality in the Unicode specification.
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_UNDEFINED = -1;
377
378    /**
379     * Strong bidirectional character type "L" in the Unicode specification.
380     * @since 1.4
381     */
382    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
383
384    /**
385     * Strong bidirectional character type "R" in the Unicode specification.
386     * @since 1.4
387     */
388    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
389
390    /**
391    * Strong bidirectional character type "AL" in the Unicode specification.
392     * @since 1.4
393     */
394    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
395
396    /**
397     * Weak bidirectional character type "EN" in the Unicode specification.
398     * @since 1.4
399     */
400    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
401
402    /**
403     * Weak bidirectional character type "ES" in the Unicode specification.
404     * @since 1.4
405     */
406    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
407
408    /**
409     * Weak bidirectional character type "ET" in the Unicode specification.
410     * @since 1.4
411     */
412    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
413
414    /**
415     * Weak bidirectional character type "AN" in the Unicode specification.
416     * @since 1.4
417     */
418    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
419
420    /**
421     * Weak bidirectional character type "CS" in the Unicode specification.
422     * @since 1.4
423     */
424    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
425
426    /**
427     * Weak bidirectional character type "NSM" in the Unicode specification.
428     * @since 1.4
429     */
430    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
431
432    /**
433     * Weak bidirectional character type "BN" in the Unicode specification.
434     * @since 1.4
435     */
436    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
437
438    /**
439     * Neutral bidirectional character type "B" in the Unicode specification.
440     * @since 1.4
441     */
442    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
443
444    /**
445     * Neutral bidirectional character type "S" in the Unicode specification.
446     * @since 1.4
447     */
448    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
449
450    /**
451     * Neutral bidirectional character type "WS" in the Unicode specification.
452     * @since 1.4
453     */
454    public static final byte DIRECTIONALITY_WHITESPACE = 12;
455
456    /**
457     * Neutral bidirectional character type "ON" in the Unicode specification.
458     * @since 1.4
459     */
460    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
461
462    /**
463     * Strong bidirectional character type "LRE" in the Unicode specification.
464     * @since 1.4
465     */
466    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
467
468    /**
469     * Strong bidirectional character type "LRO" in the Unicode specification.
470     * @since 1.4
471     */
472    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
473
474    /**
475     * Strong bidirectional character type "RLE" in the Unicode specification.
476     * @since 1.4
477     */
478    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
479
480    /**
481     * Strong bidirectional character type "RLO" in the Unicode specification.
482     * @since 1.4
483     */
484    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
485
486    /**
487     * Weak bidirectional character type "PDF" in the Unicode specification.
488     * @since 1.4
489     */
490    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
491
492    /**
493     * The minimum value of a
494     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
495     * Unicode high-surrogate code unit</a>
496     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
497     * A high-surrogate is also known as a <i>leading-surrogate</i>.
498     *
499     * @since 1.5
500     */
501    public static final char MIN_HIGH_SURROGATE = '\uD800';
502
503    /**
504     * The maximum value of a
505     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
506     * Unicode high-surrogate code unit</a>
507     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
508     * A high-surrogate is also known as a <i>leading-surrogate</i>.
509     *
510     * @since 1.5
511     */
512    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
513
514    /**
515     * The minimum value of a
516     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
517     * Unicode low-surrogate code unit</a>
518     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
519     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
520     *
521     * @since 1.5
522     */
523    public static final char MIN_LOW_SURROGATE  = '\uDC00';
524
525    /**
526     * The maximum value of a
527     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
528     * Unicode low-surrogate code unit</a>
529     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
530     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
531     *
532     * @since 1.5
533     */
534    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
535
536    /**
537     * The minimum value of a Unicode surrogate code unit in the
538     * UTF-16 encoding, constant {@code '\u005CuD800'}.
539     *
540     * @since 1.5
541     */
542    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
543
544    /**
545     * The maximum value of a Unicode surrogate code unit in the
546     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
547     *
548     * @since 1.5
549     */
550    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
551
552    /**
553     * The minimum value of a
554     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
555     * Unicode supplementary code point</a>, constant {@code U+10000}.
556     *
557     * @since 1.5
558     */
559    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
560
561    /**
562     * The minimum value of a
563     * <a href="http://www.unicode.org/glossary/#code_point">
564     * Unicode code point</a>, constant {@code U+0000}.
565     *
566     * @since 1.5
567     */
568    public static final int MIN_CODE_POINT = 0x000000;
569
570    /**
571     * The maximum value of a
572     * <a href="http://www.unicode.org/glossary/#code_point">
573     * Unicode code point</a>, constant {@code U+10FFFF}.
574     *
575     * @since 1.5
576     */
577    public static final int MAX_CODE_POINT = 0X10FFFF;
578
579    private static final byte[] DIRECTIONALITY = new byte[] {
580            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
581            DIRECTIONALITY_EUROPEAN_NUMBER,
582            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
583            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
584            DIRECTIONALITY_ARABIC_NUMBER,
585            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
586            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
587            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
588            DIRECTIONALITY_OTHER_NEUTRALS,
589            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
590            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
591            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
592            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
593            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
594            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
595            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
596
597    /**
598     * Instances of this class represent particular subsets of the Unicode
599     * character set.  The only family of subsets defined in the
600     * {@code Character} class is {@link Character.UnicodeBlock}.
601     * Other portions of the Java API may define other subsets for their
602     * own purposes.
603     *
604     * @since 1.2
605     */
606    public static class Subset  {
607
608        private String name;
609
610        /**
611         * Constructs a new {@code Subset} instance.
612         *
613         * @param  name  The name of this subset
614         * @exception NullPointerException if name is {@code null}
615         */
616        protected Subset(String name) {
617            if (name == null) {
618                throw new NullPointerException("name");
619            }
620            this.name = name;
621        }
622
623        /**
624         * Compares two {@code Subset} objects for equality.
625         * This method returns {@code true} if and only if
626         * {@code this} and the argument refer to the same
627         * object; since this method is {@code final}, this
628         * guarantee holds for all subclasses.
629         */
630        public final boolean equals(Object obj) {
631            return (this == obj);
632        }
633
634        /**
635         * Returns the standard hash code as defined by the
636         * {@link Object#hashCode} method.  This method
637         * is {@code final} in order to ensure that the
638         * {@code equals} and {@code hashCode} methods will
639         * be consistent in all subclasses.
640         */
641        public final int hashCode() {
642            return super.hashCode();
643        }
644
645        /**
646         * Returns the name of this subset.
647         */
648        public final String toString() {
649            return name;
650        }
651    }
652
653    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
654    // for the latest specification of Unicode Blocks.
655
656    /**
657     * A family of character subsets representing the character blocks in the
658     * Unicode specification. Character blocks generally define characters
659     * used for a specific script or purpose. A character is contained by
660     * at most one Unicode block.
661     *
662     * @since 1.2
663     */
664    public static final class UnicodeBlock extends Subset {
665
666        private static Map<String, UnicodeBlock> map = new HashMap<>(256);
667
668        /**
669         * Creates a UnicodeBlock with the given identifier name.
670         * This name must be the same as the block identifier.
671         */
672        private UnicodeBlock(String idName) {
673            this(idName, true);
674        }
675
676        private UnicodeBlock(String idName, boolean isMap) {
677            super(idName);
678            if (isMap) {
679                map.put(idName, this);
680            }
681        }
682
683        /**
684         * Creates a UnicodeBlock with the given identifier name and
685         * alias name.
686         */
687        private UnicodeBlock(String idName, String alias) {
688            this(idName);
689            map.put(alias, this);
690        }
691
692        /**
693         * Creates a UnicodeBlock with the given identifier name and
694         * alias names.
695         */
696        private UnicodeBlock(String idName, String... aliases) {
697            this(idName);
698            for (String alias : aliases)
699                map.put(alias, this);
700        }
701
702        /**
703         * Constant for the "Basic Latin" Unicode character block.
704         * @since 1.2
705         */
706        public static final UnicodeBlock  BASIC_LATIN =
707            new UnicodeBlock("BASIC_LATIN",
708                             "BASIC LATIN",
709                             "BASICLATIN");
710
711        /**
712         * Constant for the "Latin-1 Supplement" Unicode character block.
713         * @since 1.2
714         */
715        public static final UnicodeBlock LATIN_1_SUPPLEMENT =
716            new UnicodeBlock("LATIN_1_SUPPLEMENT",
717                             "LATIN-1 SUPPLEMENT",
718                             "LATIN-1SUPPLEMENT");
719
720        /**
721         * Constant for the "Latin Extended-A" Unicode character block.
722         * @since 1.2
723         */
724        public static final UnicodeBlock LATIN_EXTENDED_A =
725            new UnicodeBlock("LATIN_EXTENDED_A",
726                             "LATIN EXTENDED-A",
727                             "LATINEXTENDED-A");
728
729        /**
730         * Constant for the "Latin Extended-B" Unicode character block.
731         * @since 1.2
732         */
733        public static final UnicodeBlock LATIN_EXTENDED_B =
734            new UnicodeBlock("LATIN_EXTENDED_B",
735                             "LATIN EXTENDED-B",
736                             "LATINEXTENDED-B");
737
738        /**
739         * Constant for the "IPA Extensions" Unicode character block.
740         * @since 1.2
741         */
742        public static final UnicodeBlock IPA_EXTENSIONS =
743            new UnicodeBlock("IPA_EXTENSIONS",
744                             "IPA EXTENSIONS",
745                             "IPAEXTENSIONS");
746
747        /**
748         * Constant for the "Spacing Modifier Letters" Unicode character block.
749         * @since 1.2
750         */
751        public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
752            new UnicodeBlock("SPACING_MODIFIER_LETTERS",
753                             "SPACING MODIFIER LETTERS",
754                             "SPACINGMODIFIERLETTERS");
755
756        /**
757         * Constant for the "Combining Diacritical Marks" Unicode character block.
758         * @since 1.2
759         */
760        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
761            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
762                             "COMBINING DIACRITICAL MARKS",
763                             "COMBININGDIACRITICALMARKS");
764
765        /**
766         * Constant for the "Greek and Coptic" Unicode character block.
767         * <p>
768         * This block was previously known as the "Greek" block.
769         *
770         * @since 1.2
771         */
772        public static final UnicodeBlock GREEK =
773            new UnicodeBlock("GREEK",
774                             "GREEK AND COPTIC",
775                             "GREEKANDCOPTIC");
776
777        /**
778         * Constant for the "Cyrillic" Unicode character block.
779         * @since 1.2
780         */
781        public static final UnicodeBlock CYRILLIC =
782            new UnicodeBlock("CYRILLIC");
783
784        /**
785         * Constant for the "Armenian" Unicode character block.
786         * @since 1.2
787         */
788        public static final UnicodeBlock ARMENIAN =
789            new UnicodeBlock("ARMENIAN");
790
791        /**
792         * Constant for the "Hebrew" Unicode character block.
793         * @since 1.2
794         */
795        public static final UnicodeBlock HEBREW =
796            new UnicodeBlock("HEBREW");
797
798        /**
799         * Constant for the "Arabic" Unicode character block.
800         * @since 1.2
801         */
802        public static final UnicodeBlock ARABIC =
803            new UnicodeBlock("ARABIC");
804
805        /**
806         * Constant for the "Devanagari" Unicode character block.
807         * @since 1.2
808         */
809        public static final UnicodeBlock DEVANAGARI =
810            new UnicodeBlock("DEVANAGARI");
811
812        /**
813         * Constant for the "Bengali" Unicode character block.
814         * @since 1.2
815         */
816        public static final UnicodeBlock BENGALI =
817            new UnicodeBlock("BENGALI");
818
819        /**
820         * Constant for the "Gurmukhi" Unicode character block.
821         * @since 1.2
822         */
823        public static final UnicodeBlock GURMUKHI =
824            new UnicodeBlock("GURMUKHI");
825
826        /**
827         * Constant for the "Gujarati" Unicode character block.
828         * @since 1.2
829         */
830        public static final UnicodeBlock GUJARATI =
831            new UnicodeBlock("GUJARATI");
832
833        /**
834         * Constant for the "Oriya" Unicode character block.
835         * @since 1.2
836         */
837        public static final UnicodeBlock ORIYA =
838            new UnicodeBlock("ORIYA");
839
840        /**
841         * Constant for the "Tamil" Unicode character block.
842         * @since 1.2
843         */
844        public static final UnicodeBlock TAMIL =
845            new UnicodeBlock("TAMIL");
846
847        /**
848         * Constant for the "Telugu" Unicode character block.
849         * @since 1.2
850         */
851        public static final UnicodeBlock TELUGU =
852            new UnicodeBlock("TELUGU");
853
854        /**
855         * Constant for the "Kannada" Unicode character block.
856         * @since 1.2
857         */
858        public static final UnicodeBlock KANNADA =
859            new UnicodeBlock("KANNADA");
860
861        /**
862         * Constant for the "Malayalam" Unicode character block.
863         * @since 1.2
864         */
865        public static final UnicodeBlock MALAYALAM =
866            new UnicodeBlock("MALAYALAM");
867
868        /**
869         * Constant for the "Thai" Unicode character block.
870         * @since 1.2
871         */
872        public static final UnicodeBlock THAI =
873            new UnicodeBlock("THAI");
874
875        /**
876         * Constant for the "Lao" Unicode character block.
877         * @since 1.2
878         */
879        public static final UnicodeBlock LAO =
880            new UnicodeBlock("LAO");
881
882        /**
883         * Constant for the "Tibetan" Unicode character block.
884         * @since 1.2
885         */
886        public static final UnicodeBlock TIBETAN =
887            new UnicodeBlock("TIBETAN");
888
889        /**
890         * Constant for the "Georgian" Unicode character block.
891         * @since 1.2
892         */
893        public static final UnicodeBlock GEORGIAN =
894            new UnicodeBlock("GEORGIAN");
895
896        /**
897         * Constant for the "Hangul Jamo" Unicode character block.
898         * @since 1.2
899         */
900        public static final UnicodeBlock HANGUL_JAMO =
901            new UnicodeBlock("HANGUL_JAMO",
902                             "HANGUL JAMO",
903                             "HANGULJAMO");
904
905        /**
906         * Constant for the "Latin Extended Additional" Unicode character block.
907         * @since 1.2
908         */
909        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
910            new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
911                             "LATIN EXTENDED ADDITIONAL",
912                             "LATINEXTENDEDADDITIONAL");
913
914        /**
915         * Constant for the "Greek Extended" Unicode character block.
916         * @since 1.2
917         */
918        public static final UnicodeBlock GREEK_EXTENDED =
919            new UnicodeBlock("GREEK_EXTENDED",
920                             "GREEK EXTENDED",
921                             "GREEKEXTENDED");
922
923        /**
924         * Constant for the "General Punctuation" Unicode character block.
925         * @since 1.2
926         */
927        public static final UnicodeBlock GENERAL_PUNCTUATION =
928            new UnicodeBlock("GENERAL_PUNCTUATION",
929                             "GENERAL PUNCTUATION",
930                             "GENERALPUNCTUATION");
931
932        /**
933         * Constant for the "Superscripts and Subscripts" Unicode character
934         * block.
935         * @since 1.2
936         */
937        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
938            new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
939                             "SUPERSCRIPTS AND SUBSCRIPTS",
940                             "SUPERSCRIPTSANDSUBSCRIPTS");
941
942        /**
943         * Constant for the "Currency Symbols" Unicode character block.
944         * @since 1.2
945         */
946        public static final UnicodeBlock CURRENCY_SYMBOLS =
947            new UnicodeBlock("CURRENCY_SYMBOLS",
948                             "CURRENCY SYMBOLS",
949                             "CURRENCYSYMBOLS");
950
951        /**
952         * Constant for the "Combining Diacritical Marks for Symbols" Unicode
953         * character block.
954         * <p>
955         * This block was previously known as "Combining Marks for Symbols".
956         * @since 1.2
957         */
958        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
959            new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
960                             "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
961                             "COMBININGDIACRITICALMARKSFORSYMBOLS",
962                             "COMBINING MARKS FOR SYMBOLS",
963                             "COMBININGMARKSFORSYMBOLS");
964
965        /**
966         * Constant for the "Letterlike Symbols" Unicode character block.
967         * @since 1.2
968         */
969        public static final UnicodeBlock LETTERLIKE_SYMBOLS =
970            new UnicodeBlock("LETTERLIKE_SYMBOLS",
971                             "LETTERLIKE SYMBOLS",
972                             "LETTERLIKESYMBOLS");
973
974        /**
975         * Constant for the "Number Forms" Unicode character block.
976         * @since 1.2
977         */
978        public static final UnicodeBlock NUMBER_FORMS =
979            new UnicodeBlock("NUMBER_FORMS",
980                             "NUMBER FORMS",
981                             "NUMBERFORMS");
982
983        /**
984         * Constant for the "Arrows" Unicode character block.
985         * @since 1.2
986         */
987        public static final UnicodeBlock ARROWS =
988            new UnicodeBlock("ARROWS");
989
990        /**
991         * Constant for the "Mathematical Operators" Unicode character block.
992         * @since 1.2
993         */
994        public static final UnicodeBlock MATHEMATICAL_OPERATORS =
995            new UnicodeBlock("MATHEMATICAL_OPERATORS",
996                             "MATHEMATICAL OPERATORS",
997                             "MATHEMATICALOPERATORS");
998
999        /**
1000         * Constant for the "Miscellaneous Technical" Unicode character block.
1001         * @since 1.2
1002         */
1003        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1004            new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1005                             "MISCELLANEOUS TECHNICAL",
1006                             "MISCELLANEOUSTECHNICAL");
1007
1008        /**
1009         * Constant for the "Control Pictures" Unicode character block.
1010         * @since 1.2
1011         */
1012        public static final UnicodeBlock CONTROL_PICTURES =
1013            new UnicodeBlock("CONTROL_PICTURES",
1014                             "CONTROL PICTURES",
1015                             "CONTROLPICTURES");
1016
1017        /**
1018         * Constant for the "Optical Character Recognition" Unicode character block.
1019         * @since 1.2
1020         */
1021        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1022            new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1023                             "OPTICAL CHARACTER RECOGNITION",
1024                             "OPTICALCHARACTERRECOGNITION");
1025
1026        /**
1027         * Constant for the "Enclosed Alphanumerics" Unicode character block.
1028         * @since 1.2
1029         */
1030        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1031            new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1032                             "ENCLOSED ALPHANUMERICS",
1033                             "ENCLOSEDALPHANUMERICS");
1034
1035        /**
1036         * Constant for the "Box Drawing" Unicode character block.
1037         * @since 1.2
1038         */
1039        public static final UnicodeBlock BOX_DRAWING =
1040            new UnicodeBlock("BOX_DRAWING",
1041                             "BOX DRAWING",
1042                             "BOXDRAWING");
1043
1044        /**
1045         * Constant for the "Block Elements" Unicode character block.
1046         * @since 1.2
1047         */
1048        public static final UnicodeBlock BLOCK_ELEMENTS =
1049            new UnicodeBlock("BLOCK_ELEMENTS",
1050                             "BLOCK ELEMENTS",
1051                             "BLOCKELEMENTS");
1052
1053        /**
1054         * Constant for the "Geometric Shapes" Unicode character block.
1055         * @since 1.2
1056         */
1057        public static final UnicodeBlock GEOMETRIC_SHAPES =
1058            new UnicodeBlock("GEOMETRIC_SHAPES",
1059                             "GEOMETRIC SHAPES",
1060                             "GEOMETRICSHAPES");
1061
1062        /**
1063         * Constant for the "Miscellaneous Symbols" Unicode character block.
1064         * @since 1.2
1065         */
1066        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1067            new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1068                             "MISCELLANEOUS SYMBOLS",
1069                             "MISCELLANEOUSSYMBOLS");
1070
1071        /**
1072         * Constant for the "Dingbats" Unicode character block.
1073         * @since 1.2
1074         */
1075        public static final UnicodeBlock DINGBATS =
1076            new UnicodeBlock("DINGBATS");
1077
1078        /**
1079         * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1080         * @since 1.2
1081         */
1082        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1083            new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1084                             "CJK SYMBOLS AND PUNCTUATION",
1085                             "CJKSYMBOLSANDPUNCTUATION");
1086
1087        /**
1088         * Constant for the "Hiragana" Unicode character block.
1089         * @since 1.2
1090         */
1091        public static final UnicodeBlock HIRAGANA =
1092            new UnicodeBlock("HIRAGANA");
1093
1094        /**
1095         * Constant for the "Katakana" Unicode character block.
1096         * @since 1.2
1097         */
1098        public static final UnicodeBlock KATAKANA =
1099            new UnicodeBlock("KATAKANA");
1100
1101        /**
1102         * Constant for the "Bopomofo" Unicode character block.
1103         * @since 1.2
1104         */
1105        public static final UnicodeBlock BOPOMOFO =
1106            new UnicodeBlock("BOPOMOFO");
1107
1108        /**
1109         * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1110         * @since 1.2
1111         */
1112        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1113            new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1114                             "HANGUL COMPATIBILITY JAMO",
1115                             "HANGULCOMPATIBILITYJAMO");
1116
1117        /**
1118         * Constant for the "Kanbun" Unicode character block.
1119         * @since 1.2
1120         */
1121        public static final UnicodeBlock KANBUN =
1122            new UnicodeBlock("KANBUN");
1123
1124        /**
1125         * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1126         * @since 1.2
1127         */
1128        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1129            new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1130                             "ENCLOSED CJK LETTERS AND MONTHS",
1131                             "ENCLOSEDCJKLETTERSANDMONTHS");
1132
1133        /**
1134         * Constant for the "CJK Compatibility" Unicode character block.
1135         * @since 1.2
1136         */
1137        public static final UnicodeBlock CJK_COMPATIBILITY =
1138            new UnicodeBlock("CJK_COMPATIBILITY",
1139                             "CJK COMPATIBILITY",
1140                             "CJKCOMPATIBILITY");
1141
1142        /**
1143         * Constant for the "CJK Unified Ideographs" Unicode character block.
1144         * @since 1.2
1145         */
1146        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1147            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1148                             "CJK UNIFIED IDEOGRAPHS",
1149                             "CJKUNIFIEDIDEOGRAPHS");
1150
1151        /**
1152         * Constant for the "Hangul Syllables" Unicode character block.
1153         * @since 1.2
1154         */
1155        public static final UnicodeBlock HANGUL_SYLLABLES =
1156            new UnicodeBlock("HANGUL_SYLLABLES",
1157                             "HANGUL SYLLABLES",
1158                             "HANGULSYLLABLES");
1159
1160        /**
1161         * Constant for the "Private Use Area" Unicode character block.
1162         * @since 1.2
1163         */
1164        public static final UnicodeBlock PRIVATE_USE_AREA =
1165            new UnicodeBlock("PRIVATE_USE_AREA",
1166                             "PRIVATE USE AREA",
1167                             "PRIVATEUSEAREA");
1168
1169        /**
1170         * Constant for the "CJK Compatibility Ideographs" Unicode character
1171         * block.
1172         * @since 1.2
1173         */
1174        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1175            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1176                             "CJK COMPATIBILITY IDEOGRAPHS",
1177                             "CJKCOMPATIBILITYIDEOGRAPHS");
1178
1179        /**
1180         * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1181         * @since 1.2
1182         */
1183        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1184            new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1185                             "ALPHABETIC PRESENTATION FORMS",
1186                             "ALPHABETICPRESENTATIONFORMS");
1187
1188        /**
1189         * Constant for the "Arabic Presentation Forms-A" Unicode character
1190         * block.
1191         * @since 1.2
1192         */
1193        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1194            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1195                             "ARABIC PRESENTATION FORMS-A",
1196                             "ARABICPRESENTATIONFORMS-A");
1197
1198        /**
1199         * Constant for the "Combining Half Marks" Unicode character block.
1200         * @since 1.2
1201         */
1202        public static final UnicodeBlock COMBINING_HALF_MARKS =
1203            new UnicodeBlock("COMBINING_HALF_MARKS",
1204                             "COMBINING HALF MARKS",
1205                             "COMBININGHALFMARKS");
1206
1207        /**
1208         * Constant for the "CJK Compatibility Forms" Unicode character block.
1209         * @since 1.2
1210         */
1211        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1212            new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1213                             "CJK COMPATIBILITY FORMS",
1214                             "CJKCOMPATIBILITYFORMS");
1215
1216        /**
1217         * Constant for the "Small Form Variants" Unicode character block.
1218         * @since 1.2
1219         */
1220        public static final UnicodeBlock SMALL_FORM_VARIANTS =
1221            new UnicodeBlock("SMALL_FORM_VARIANTS",
1222                             "SMALL FORM VARIANTS",
1223                             "SMALLFORMVARIANTS");
1224
1225        /**
1226         * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1227         * @since 1.2
1228         */
1229        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1230            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1231                             "ARABIC PRESENTATION FORMS-B",
1232                             "ARABICPRESENTATIONFORMS-B");
1233
1234        /**
1235         * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1236         * block.
1237         * @since 1.2
1238         */
1239        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1240            new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1241                             "HALFWIDTH AND FULLWIDTH FORMS",
1242                             "HALFWIDTHANDFULLWIDTHFORMS");
1243
1244        /**
1245         * Constant for the "Specials" Unicode character block.
1246         * @since 1.2
1247         */
1248        public static final UnicodeBlock SPECIALS =
1249            new UnicodeBlock("SPECIALS");
1250
1251        /**
1252         * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1253         *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1254         *             {@link #LOW_SURROGATES}. These new constants match
1255         *             the block definitions of the Unicode Standard.
1256         *             The {@link #of(char)} and {@link #of(int)} methods
1257         *             return the new constants, not SURROGATES_AREA.
1258         */
1259        @Deprecated
1260        public static final UnicodeBlock SURROGATES_AREA =
1261            new UnicodeBlock("SURROGATES_AREA", false);
1262
1263        /**
1264         * Constant for the "Syriac" Unicode character block.
1265         * @since 1.4
1266         */
1267        public static final UnicodeBlock SYRIAC =
1268            new UnicodeBlock("SYRIAC");
1269
1270        /**
1271         * Constant for the "Thaana" Unicode character block.
1272         * @since 1.4
1273         */
1274        public static final UnicodeBlock THAANA =
1275            new UnicodeBlock("THAANA");
1276
1277        /**
1278         * Constant for the "Sinhala" Unicode character block.
1279         * @since 1.4
1280         */
1281        public static final UnicodeBlock SINHALA =
1282            new UnicodeBlock("SINHALA");
1283
1284        /**
1285         * Constant for the "Myanmar" Unicode character block.
1286         * @since 1.4
1287         */
1288        public static final UnicodeBlock MYANMAR =
1289            new UnicodeBlock("MYANMAR");
1290
1291        /**
1292         * Constant for the "Ethiopic" Unicode character block.
1293         * @since 1.4
1294         */
1295        public static final UnicodeBlock ETHIOPIC =
1296            new UnicodeBlock("ETHIOPIC");
1297
1298        /**
1299         * Constant for the "Cherokee" Unicode character block.
1300         * @since 1.4
1301         */
1302        public static final UnicodeBlock CHEROKEE =
1303            new UnicodeBlock("CHEROKEE");
1304
1305        /**
1306         * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1307         * @since 1.4
1308         */
1309        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1310            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1311                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1312                             "UNIFIEDCANADIANABORIGINALSYLLABICS");
1313
1314        /**
1315         * Constant for the "Ogham" Unicode character block.
1316         * @since 1.4
1317         */
1318        public static final UnicodeBlock OGHAM =
1319            new UnicodeBlock("OGHAM");
1320
1321        /**
1322         * Constant for the "Runic" Unicode character block.
1323         * @since 1.4
1324         */
1325        public static final UnicodeBlock RUNIC =
1326            new UnicodeBlock("RUNIC");
1327
1328        /**
1329         * Constant for the "Khmer" Unicode character block.
1330         * @since 1.4
1331         */
1332        public static final UnicodeBlock KHMER =
1333            new UnicodeBlock("KHMER");
1334
1335        /**
1336         * Constant for the "Mongolian" Unicode character block.
1337         * @since 1.4
1338         */
1339        public static final UnicodeBlock MONGOLIAN =
1340            new UnicodeBlock("MONGOLIAN");
1341
1342        /**
1343         * Constant for the "Braille Patterns" Unicode character block.
1344         * @since 1.4
1345         */
1346        public static final UnicodeBlock BRAILLE_PATTERNS =
1347            new UnicodeBlock("BRAILLE_PATTERNS",
1348                             "BRAILLE PATTERNS",
1349                             "BRAILLEPATTERNS");
1350
1351        /**
1352         * Constant for the "CJK Radicals Supplement" Unicode character block.
1353         * @since 1.4
1354         */
1355        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1356            new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1357                             "CJK RADICALS SUPPLEMENT",
1358                             "CJKRADICALSSUPPLEMENT");
1359
1360        /**
1361         * Constant for the "Kangxi Radicals" Unicode character block.
1362         * @since 1.4
1363         */
1364        public static final UnicodeBlock KANGXI_RADICALS =
1365            new UnicodeBlock("KANGXI_RADICALS",
1366                             "KANGXI RADICALS",
1367                             "KANGXIRADICALS");
1368
1369        /**
1370         * Constant for the "Ideographic Description Characters" Unicode character block.
1371         * @since 1.4
1372         */
1373        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1374            new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1375                             "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1376                             "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1377
1378        /**
1379         * Constant for the "Bopomofo Extended" Unicode character block.
1380         * @since 1.4
1381         */
1382        public static final UnicodeBlock BOPOMOFO_EXTENDED =
1383            new UnicodeBlock("BOPOMOFO_EXTENDED",
1384                             "BOPOMOFO EXTENDED",
1385                             "BOPOMOFOEXTENDED");
1386
1387        /**
1388         * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1389         * @since 1.4
1390         */
1391        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1392            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1393                             "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1394                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1395
1396        /**
1397         * Constant for the "Yi Syllables" Unicode character block.
1398         * @since 1.4
1399         */
1400        public static final UnicodeBlock YI_SYLLABLES =
1401            new UnicodeBlock("YI_SYLLABLES",
1402                             "YI SYLLABLES",
1403                             "YISYLLABLES");
1404
1405        /**
1406         * Constant for the "Yi Radicals" Unicode character block.
1407         * @since 1.4
1408         */
1409        public static final UnicodeBlock YI_RADICALS =
1410            new UnicodeBlock("YI_RADICALS",
1411                             "YI RADICALS",
1412                             "YIRADICALS");
1413
1414        /**
1415         * Constant for the "Cyrillic Supplementary" Unicode character block.
1416         * @since 1.5
1417         */
1418        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1419            new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1420                             "CYRILLIC SUPPLEMENTARY",
1421                             "CYRILLICSUPPLEMENTARY",
1422                             "CYRILLIC SUPPLEMENT",
1423                             "CYRILLICSUPPLEMENT");
1424
1425        /**
1426         * Constant for the "Tagalog" Unicode character block.
1427         * @since 1.5
1428         */
1429        public static final UnicodeBlock TAGALOG =
1430            new UnicodeBlock("TAGALOG");
1431
1432        /**
1433         * Constant for the "Hanunoo" Unicode character block.
1434         * @since 1.5
1435         */
1436        public static final UnicodeBlock HANUNOO =
1437            new UnicodeBlock("HANUNOO");
1438
1439        /**
1440         * Constant for the "Buhid" Unicode character block.
1441         * @since 1.5
1442         */
1443        public static final UnicodeBlock BUHID =
1444            new UnicodeBlock("BUHID");
1445
1446        /**
1447         * Constant for the "Tagbanwa" Unicode character block.
1448         * @since 1.5
1449         */
1450        public static final UnicodeBlock TAGBANWA =
1451            new UnicodeBlock("TAGBANWA");
1452
1453        /**
1454         * Constant for the "Limbu" Unicode character block.
1455         * @since 1.5
1456         */
1457        public static final UnicodeBlock LIMBU =
1458            new UnicodeBlock("LIMBU");
1459
1460        /**
1461         * Constant for the "Tai Le" Unicode character block.
1462         * @since 1.5
1463         */
1464        public static final UnicodeBlock TAI_LE =
1465            new UnicodeBlock("TAI_LE",
1466                             "TAI LE",
1467                             "TAILE");
1468
1469        /**
1470         * Constant for the "Khmer Symbols" Unicode character block.
1471         * @since 1.5
1472         */
1473        public static final UnicodeBlock KHMER_SYMBOLS =
1474            new UnicodeBlock("KHMER_SYMBOLS",
1475                             "KHMER SYMBOLS",
1476                             "KHMERSYMBOLS");
1477
1478        /**
1479         * Constant for the "Phonetic Extensions" Unicode character block.
1480         * @since 1.5
1481         */
1482        public static final UnicodeBlock PHONETIC_EXTENSIONS =
1483            new UnicodeBlock("PHONETIC_EXTENSIONS",
1484                             "PHONETIC EXTENSIONS",
1485                             "PHONETICEXTENSIONS");
1486
1487        /**
1488         * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1489         * @since 1.5
1490         */
1491        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1492            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1493                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1494                             "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1495
1496        /**
1497         * Constant for the "Supplemental Arrows-A" Unicode character block.
1498         * @since 1.5
1499         */
1500        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1501            new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1502                             "SUPPLEMENTAL ARROWS-A",
1503                             "SUPPLEMENTALARROWS-A");
1504
1505        /**
1506         * Constant for the "Supplemental Arrows-B" Unicode character block.
1507         * @since 1.5
1508         */
1509        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1510            new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1511                             "SUPPLEMENTAL ARROWS-B",
1512                             "SUPPLEMENTALARROWS-B");
1513
1514        /**
1515         * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1516         * character block.
1517         * @since 1.5
1518         */
1519        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1520            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1521                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1522                             "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1523
1524        /**
1525         * Constant for the "Supplemental Mathematical Operators" Unicode
1526         * character block.
1527         * @since 1.5
1528         */
1529        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1530            new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1531                             "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1532                             "SUPPLEMENTALMATHEMATICALOPERATORS");
1533
1534        /**
1535         * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1536         * block.
1537         * @since 1.5
1538         */
1539        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1540            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1541                             "MISCELLANEOUS SYMBOLS AND ARROWS",
1542                             "MISCELLANEOUSSYMBOLSANDARROWS");
1543
1544        /**
1545         * Constant for the "Katakana Phonetic Extensions" Unicode character
1546         * block.
1547         * @since 1.5
1548         */
1549        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1550            new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1551                             "KATAKANA PHONETIC EXTENSIONS",
1552                             "KATAKANAPHONETICEXTENSIONS");
1553
1554        /**
1555         * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1556         * @since 1.5
1557         */
1558        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1559            new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1560                             "YIJING HEXAGRAM SYMBOLS",
1561                             "YIJINGHEXAGRAMSYMBOLS");
1562
1563        /**
1564         * Constant for the "Variation Selectors" Unicode character block.
1565         * @since 1.5
1566         */
1567        public static final UnicodeBlock VARIATION_SELECTORS =
1568            new UnicodeBlock("VARIATION_SELECTORS",
1569                             "VARIATION SELECTORS",
1570                             "VARIATIONSELECTORS");
1571
1572        /**
1573         * Constant for the "Linear B Syllabary" Unicode character block.
1574         * @since 1.5
1575         */
1576        public static final UnicodeBlock LINEAR_B_SYLLABARY =
1577            new UnicodeBlock("LINEAR_B_SYLLABARY",
1578                             "LINEAR B SYLLABARY",
1579                             "LINEARBSYLLABARY");
1580
1581        /**
1582         * Constant for the "Linear B Ideograms" Unicode character block.
1583         * @since 1.5
1584         */
1585        public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1586            new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1587                             "LINEAR B IDEOGRAMS",
1588                             "LINEARBIDEOGRAMS");
1589
1590        /**
1591         * Constant for the "Aegean Numbers" Unicode character block.
1592         * @since 1.5
1593         */
1594        public static final UnicodeBlock AEGEAN_NUMBERS =
1595            new UnicodeBlock("AEGEAN_NUMBERS",
1596                             "AEGEAN NUMBERS",
1597                             "AEGEANNUMBERS");
1598
1599        /**
1600         * Constant for the "Old Italic" Unicode character block.
1601         * @since 1.5
1602         */
1603        public static final UnicodeBlock OLD_ITALIC =
1604            new UnicodeBlock("OLD_ITALIC",
1605                             "OLD ITALIC",
1606                             "OLDITALIC");
1607
1608        /**
1609         * Constant for the "Gothic" Unicode character block.
1610         * @since 1.5
1611         */
1612        public static final UnicodeBlock GOTHIC =
1613            new UnicodeBlock("GOTHIC");
1614
1615        /**
1616         * Constant for the "Ugaritic" Unicode character block.
1617         * @since 1.5
1618         */
1619        public static final UnicodeBlock UGARITIC =
1620            new UnicodeBlock("UGARITIC");
1621
1622        /**
1623         * Constant for the "Deseret" Unicode character block.
1624         * @since 1.5
1625         */
1626        public static final UnicodeBlock DESERET =
1627            new UnicodeBlock("DESERET");
1628
1629        /**
1630         * Constant for the "Shavian" Unicode character block.
1631         * @since 1.5
1632         */
1633        public static final UnicodeBlock SHAVIAN =
1634            new UnicodeBlock("SHAVIAN");
1635
1636        /**
1637         * Constant for the "Osmanya" Unicode character block.
1638         * @since 1.5
1639         */
1640        public static final UnicodeBlock OSMANYA =
1641            new UnicodeBlock("OSMANYA");
1642
1643        /**
1644         * Constant for the "Cypriot Syllabary" Unicode character block.
1645         * @since 1.5
1646         */
1647        public static final UnicodeBlock CYPRIOT_SYLLABARY =
1648            new UnicodeBlock("CYPRIOT_SYLLABARY",
1649                             "CYPRIOT SYLLABARY",
1650                             "CYPRIOTSYLLABARY");
1651
1652        /**
1653         * Constant for the "Byzantine Musical Symbols" Unicode character block.
1654         * @since 1.5
1655         */
1656        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1657            new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1658                             "BYZANTINE MUSICAL SYMBOLS",
1659                             "BYZANTINEMUSICALSYMBOLS");
1660
1661        /**
1662         * Constant for the "Musical Symbols" Unicode character block.
1663         * @since 1.5
1664         */
1665        public static final UnicodeBlock MUSICAL_SYMBOLS =
1666            new UnicodeBlock("MUSICAL_SYMBOLS",
1667                             "MUSICAL SYMBOLS",
1668                             "MUSICALSYMBOLS");
1669
1670        /**
1671         * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1672         * @since 1.5
1673         */
1674        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1675            new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1676                             "TAI XUAN JING SYMBOLS",
1677                             "TAIXUANJINGSYMBOLS");
1678
1679        /**
1680         * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1681         * character block.
1682         * @since 1.5
1683         */
1684        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1685            new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1686                             "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1687                             "MATHEMATICALALPHANUMERICSYMBOLS");
1688
1689        /**
1690         * Constant for the "CJK Unified Ideographs Extension B" Unicode
1691         * character block.
1692         * @since 1.5
1693         */
1694        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1695            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1696                             "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1697                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1698
1699        /**
1700         * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1701         * @since 1.5
1702         */
1703        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1704            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1705                             "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1706                             "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1707
1708        /**
1709         * Constant for the "Tags" Unicode character block.
1710         * @since 1.5
1711         */
1712        public static final UnicodeBlock TAGS =
1713            new UnicodeBlock("TAGS");
1714
1715        /**
1716         * Constant for the "Variation Selectors Supplement" Unicode character
1717         * block.
1718         * @since 1.5
1719         */
1720        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1721            new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1722                             "VARIATION SELECTORS SUPPLEMENT",
1723                             "VARIATIONSELECTORSSUPPLEMENT");
1724
1725        /**
1726         * Constant for the "Supplementary Private Use Area-A" Unicode character
1727         * block.
1728         * @since 1.5
1729         */
1730        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1731            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1732                             "SUPPLEMENTARY PRIVATE USE AREA-A",
1733                             "SUPPLEMENTARYPRIVATEUSEAREA-A");
1734
1735        /**
1736         * Constant for the "Supplementary Private Use Area-B" Unicode character
1737         * block.
1738         * @since 1.5
1739         */
1740        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1741            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1742                             "SUPPLEMENTARY PRIVATE USE AREA-B",
1743                             "SUPPLEMENTARYPRIVATEUSEAREA-B");
1744
1745        /**
1746         * Constant for the "High Surrogates" Unicode character block.
1747         * This block represents codepoint values in the high surrogate
1748         * range: U+D800 through U+DB7F
1749         *
1750         * @since 1.5
1751         */
1752        public static final UnicodeBlock HIGH_SURROGATES =
1753            new UnicodeBlock("HIGH_SURROGATES",
1754                             "HIGH SURROGATES",
1755                             "HIGHSURROGATES");
1756
1757        /**
1758         * Constant for the "High Private Use Surrogates" Unicode character
1759         * block.
1760         * This block represents codepoint values in the private use high
1761         * surrogate range: U+DB80 through U+DBFF
1762         *
1763         * @since 1.5
1764         */
1765        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1766            new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1767                             "HIGH PRIVATE USE SURROGATES",
1768                             "HIGHPRIVATEUSESURROGATES");
1769
1770        /**
1771         * Constant for the "Low Surrogates" Unicode character block.
1772         * This block represents codepoint values in the low surrogate
1773         * range: U+DC00 through U+DFFF
1774         *
1775         * @since 1.5
1776         */
1777        public static final UnicodeBlock LOW_SURROGATES =
1778            new UnicodeBlock("LOW_SURROGATES",
1779                             "LOW SURROGATES",
1780                             "LOWSURROGATES");
1781
1782        /**
1783         * Constant for the "Arabic Supplement" Unicode character block.
1784         * @since 1.7
1785         */
1786        public static final UnicodeBlock ARABIC_SUPPLEMENT =
1787            new UnicodeBlock("ARABIC_SUPPLEMENT",
1788                             "ARABIC SUPPLEMENT",
1789                             "ARABICSUPPLEMENT");
1790
1791        /**
1792         * Constant for the "NKo" Unicode character block.
1793         * @since 1.7
1794         */
1795        public static final UnicodeBlock NKO =
1796            new UnicodeBlock("NKO");
1797
1798        /**
1799         * Constant for the "Samaritan" Unicode character block.
1800         * @since 1.7
1801         */
1802        public static final UnicodeBlock SAMARITAN =
1803            new UnicodeBlock("SAMARITAN");
1804
1805        /**
1806         * Constant for the "Mandaic" Unicode character block.
1807         * @since 1.7
1808         */
1809        public static final UnicodeBlock MANDAIC =
1810            new UnicodeBlock("MANDAIC");
1811
1812        /**
1813         * Constant for the "Ethiopic Supplement" Unicode character block.
1814         * @since 1.7
1815         */
1816        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1817            new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1818                             "ETHIOPIC SUPPLEMENT",
1819                             "ETHIOPICSUPPLEMENT");
1820
1821        /**
1822         * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1823         * Unicode character block.
1824         * @since 1.7
1825         */
1826        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1827            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1828                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1829                             "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1830
1831        /**
1832         * Constant for the "New Tai Lue" Unicode character block.
1833         * @since 1.7
1834         */
1835        public static final UnicodeBlock NEW_TAI_LUE =
1836            new UnicodeBlock("NEW_TAI_LUE",
1837                             "NEW TAI LUE",
1838                             "NEWTAILUE");
1839
1840        /**
1841         * Constant for the "Buginese" Unicode character block.
1842         * @since 1.7
1843         */
1844        public static final UnicodeBlock BUGINESE =
1845            new UnicodeBlock("BUGINESE");
1846
1847        /**
1848         * Constant for the "Tai Tham" Unicode character block.
1849         * @since 1.7
1850         */
1851        public static final UnicodeBlock TAI_THAM =
1852            new UnicodeBlock("TAI_THAM",
1853                             "TAI THAM",
1854                             "TAITHAM");
1855
1856        /**
1857         * Constant for the "Balinese" Unicode character block.
1858         * @since 1.7
1859         */
1860        public static final UnicodeBlock BALINESE =
1861            new UnicodeBlock("BALINESE");
1862
1863        /**
1864         * Constant for the "Sundanese" Unicode character block.
1865         * @since 1.7
1866         */
1867        public static final UnicodeBlock SUNDANESE =
1868            new UnicodeBlock("SUNDANESE");
1869
1870        /**
1871         * Constant for the "Batak" Unicode character block.
1872         * @since 1.7
1873         */
1874        public static final UnicodeBlock BATAK =
1875            new UnicodeBlock("BATAK");
1876
1877        /**
1878         * Constant for the "Lepcha" Unicode character block.
1879         * @since 1.7
1880         */
1881        public static final UnicodeBlock LEPCHA =
1882            new UnicodeBlock("LEPCHA");
1883
1884        /**
1885         * Constant for the "Ol Chiki" Unicode character block.
1886         * @since 1.7
1887         */
1888        public static final UnicodeBlock OL_CHIKI =
1889            new UnicodeBlock("OL_CHIKI",
1890                             "OL CHIKI",
1891                             "OLCHIKI");
1892
1893        /**
1894         * Constant for the "Vedic Extensions" Unicode character block.
1895         * @since 1.7
1896         */
1897        public static final UnicodeBlock VEDIC_EXTENSIONS =
1898            new UnicodeBlock("VEDIC_EXTENSIONS",
1899                             "VEDIC EXTENSIONS",
1900                             "VEDICEXTENSIONS");
1901
1902        /**
1903         * Constant for the "Phonetic Extensions Supplement" Unicode character
1904         * block.
1905         * @since 1.7
1906         */
1907        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1908            new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1909                             "PHONETIC EXTENSIONS SUPPLEMENT",
1910                             "PHONETICEXTENSIONSSUPPLEMENT");
1911
1912        /**
1913         * Constant for the "Combining Diacritical Marks Supplement" Unicode
1914         * character block.
1915         * @since 1.7
1916         */
1917        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1918            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1919                             "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1920                             "COMBININGDIACRITICALMARKSSUPPLEMENT");
1921
1922        /**
1923         * Constant for the "Glagolitic" Unicode character block.
1924         * @since 1.7
1925         */
1926        public static final UnicodeBlock GLAGOLITIC =
1927            new UnicodeBlock("GLAGOLITIC");
1928
1929        /**
1930         * Constant for the "Latin Extended-C" Unicode character block.
1931         * @since 1.7
1932         */
1933        public static final UnicodeBlock LATIN_EXTENDED_C =
1934            new UnicodeBlock("LATIN_EXTENDED_C",
1935                             "LATIN EXTENDED-C",
1936                             "LATINEXTENDED-C");
1937
1938        /**
1939         * Constant for the "Coptic" Unicode character block.
1940         * @since 1.7
1941         */
1942        public static final UnicodeBlock COPTIC =
1943            new UnicodeBlock("COPTIC");
1944
1945        /**
1946         * Constant for the "Georgian Supplement" Unicode character block.
1947         * @since 1.7
1948         */
1949        public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1950            new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1951                             "GEORGIAN SUPPLEMENT",
1952                             "GEORGIANSUPPLEMENT");
1953
1954        /**
1955         * Constant for the "Tifinagh" Unicode character block.
1956         * @since 1.7
1957         */
1958        public static final UnicodeBlock TIFINAGH =
1959            new UnicodeBlock("TIFINAGH");
1960
1961        /**
1962         * Constant for the "Ethiopic Extended" Unicode character block.
1963         * @since 1.7
1964         */
1965        public static final UnicodeBlock ETHIOPIC_EXTENDED =
1966            new UnicodeBlock("ETHIOPIC_EXTENDED",
1967                             "ETHIOPIC EXTENDED",
1968                             "ETHIOPICEXTENDED");
1969
1970        /**
1971         * Constant for the "Cyrillic Extended-A" Unicode character block.
1972         * @since 1.7
1973         */
1974        public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1975            new UnicodeBlock("CYRILLIC_EXTENDED_A",
1976                             "CYRILLIC EXTENDED-A",
1977                             "CYRILLICEXTENDED-A");
1978
1979        /**
1980         * Constant for the "Supplemental Punctuation" Unicode character block.
1981         * @since 1.7
1982         */
1983        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1984            new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1985                             "SUPPLEMENTAL PUNCTUATION",
1986                             "SUPPLEMENTALPUNCTUATION");
1987
1988        /**
1989         * Constant for the "CJK Strokes" Unicode character block.
1990         * @since 1.7
1991         */
1992        public static final UnicodeBlock CJK_STROKES =
1993            new UnicodeBlock("CJK_STROKES",
1994                             "CJK STROKES",
1995                             "CJKSTROKES");
1996
1997        /**
1998         * Constant for the "Lisu" Unicode character block.
1999         * @since 1.7
2000         */
2001        public static final UnicodeBlock LISU =
2002            new UnicodeBlock("LISU");
2003
2004        /**
2005         * Constant for the "Vai" Unicode character block.
2006         * @since 1.7
2007         */
2008        public static final UnicodeBlock VAI =
2009            new UnicodeBlock("VAI");
2010
2011        /**
2012         * Constant for the "Cyrillic Extended-B" Unicode character block.
2013         * @since 1.7
2014         */
2015        public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2016            new UnicodeBlock("CYRILLIC_EXTENDED_B",
2017                             "CYRILLIC EXTENDED-B",
2018                             "CYRILLICEXTENDED-B");
2019
2020        /**
2021         * Constant for the "Bamum" Unicode character block.
2022         * @since 1.7
2023         */
2024        public static final UnicodeBlock BAMUM =
2025            new UnicodeBlock("BAMUM");
2026
2027        /**
2028         * Constant for the "Modifier Tone Letters" Unicode character block.
2029         * @since 1.7
2030         */
2031        public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2032            new UnicodeBlock("MODIFIER_TONE_LETTERS",
2033                             "MODIFIER TONE LETTERS",
2034                             "MODIFIERTONELETTERS");
2035
2036        /**
2037         * Constant for the "Latin Extended-D" Unicode character block.
2038         * @since 1.7
2039         */
2040        public static final UnicodeBlock LATIN_EXTENDED_D =
2041            new UnicodeBlock("LATIN_EXTENDED_D",
2042                             "LATIN EXTENDED-D",
2043                             "LATINEXTENDED-D");
2044
2045        /**
2046         * Constant for the "Syloti Nagri" Unicode character block.
2047         * @since 1.7
2048         */
2049        public static final UnicodeBlock SYLOTI_NAGRI =
2050            new UnicodeBlock("SYLOTI_NAGRI",
2051                             "SYLOTI NAGRI",
2052                             "SYLOTINAGRI");
2053
2054        /**
2055         * Constant for the "Common Indic Number Forms" Unicode character block.
2056         * @since 1.7
2057         */
2058        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2059            new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2060                             "COMMON INDIC NUMBER FORMS",
2061                             "COMMONINDICNUMBERFORMS");
2062
2063        /**
2064         * Constant for the "Phags-pa" Unicode character block.
2065         * @since 1.7
2066         */
2067        public static final UnicodeBlock PHAGS_PA =
2068            new UnicodeBlock("PHAGS_PA",
2069                             "PHAGS-PA");
2070
2071        /**
2072         * Constant for the "Saurashtra" Unicode character block.
2073         * @since 1.7
2074         */
2075        public static final UnicodeBlock SAURASHTRA =
2076            new UnicodeBlock("SAURASHTRA");
2077
2078        /**
2079         * Constant for the "Devanagari Extended" Unicode character block.
2080         * @since 1.7
2081         */
2082        public static final UnicodeBlock DEVANAGARI_EXTENDED =
2083            new UnicodeBlock("DEVANAGARI_EXTENDED",
2084                             "DEVANAGARI EXTENDED",
2085                             "DEVANAGARIEXTENDED");
2086
2087        /**
2088         * Constant for the "Kayah Li" Unicode character block.
2089         * @since 1.7
2090         */
2091        public static final UnicodeBlock KAYAH_LI =
2092            new UnicodeBlock("KAYAH_LI",
2093                             "KAYAH LI",
2094                             "KAYAHLI");
2095
2096        /**
2097         * Constant for the "Rejang" Unicode character block.
2098         * @since 1.7
2099         */
2100        public static final UnicodeBlock REJANG =
2101            new UnicodeBlock("REJANG");
2102
2103        /**
2104         * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2105         * @since 1.7
2106         */
2107        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2108            new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2109                             "HANGUL JAMO EXTENDED-A",
2110                             "HANGULJAMOEXTENDED-A");
2111
2112        /**
2113         * Constant for the "Javanese" Unicode character block.
2114         * @since 1.7
2115         */
2116        public static final UnicodeBlock JAVANESE =
2117            new UnicodeBlock("JAVANESE");
2118
2119        /**
2120         * Constant for the "Cham" Unicode character block.
2121         * @since 1.7
2122         */
2123        public static final UnicodeBlock CHAM =
2124            new UnicodeBlock("CHAM");
2125
2126        /**
2127         * Constant for the "Myanmar Extended-A" Unicode character block.
2128         * @since 1.7
2129         */
2130        public static final UnicodeBlock MYANMAR_EXTENDED_A =
2131            new UnicodeBlock("MYANMAR_EXTENDED_A",
2132                             "MYANMAR EXTENDED-A",
2133                             "MYANMAREXTENDED-A");
2134
2135        /**
2136         * Constant for the "Tai Viet" Unicode character block.
2137         * @since 1.7
2138         */
2139        public static final UnicodeBlock TAI_VIET =
2140            new UnicodeBlock("TAI_VIET",
2141                             "TAI VIET",
2142                             "TAIVIET");
2143
2144        /**
2145         * Constant for the "Ethiopic Extended-A" Unicode character block.
2146         * @since 1.7
2147         */
2148        public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2149            new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2150                             "ETHIOPIC EXTENDED-A",
2151                             "ETHIOPICEXTENDED-A");
2152
2153        /**
2154         * Constant for the "Meetei Mayek" Unicode character block.
2155         * @since 1.7
2156         */
2157        public static final UnicodeBlock MEETEI_MAYEK =
2158            new UnicodeBlock("MEETEI_MAYEK",
2159                             "MEETEI MAYEK",
2160                             "MEETEIMAYEK");
2161
2162        /**
2163         * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2164         * @since 1.7
2165         */
2166        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2167            new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2168                             "HANGUL JAMO EXTENDED-B",
2169                             "HANGULJAMOEXTENDED-B");
2170
2171        /**
2172         * Constant for the "Vertical Forms" Unicode character block.
2173         * @since 1.7
2174         */
2175        public static final UnicodeBlock VERTICAL_FORMS =
2176            new UnicodeBlock("VERTICAL_FORMS",
2177                             "VERTICAL FORMS",
2178                             "VERTICALFORMS");
2179
2180        /**
2181         * Constant for the "Ancient Greek Numbers" Unicode character block.
2182         * @since 1.7
2183         */
2184        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2185            new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2186                             "ANCIENT GREEK NUMBERS",
2187                             "ANCIENTGREEKNUMBERS");
2188
2189        /**
2190         * Constant for the "Ancient Symbols" Unicode character block.
2191         * @since 1.7
2192         */
2193        public static final UnicodeBlock ANCIENT_SYMBOLS =
2194            new UnicodeBlock("ANCIENT_SYMBOLS",
2195                             "ANCIENT SYMBOLS",
2196                             "ANCIENTSYMBOLS");
2197
2198        /**
2199         * Constant for the "Phaistos Disc" Unicode character block.
2200         * @since 1.7
2201         */
2202        public static final UnicodeBlock PHAISTOS_DISC =
2203            new UnicodeBlock("PHAISTOS_DISC",
2204                             "PHAISTOS DISC",
2205                             "PHAISTOSDISC");
2206
2207        /**
2208         * Constant for the "Lycian" Unicode character block.
2209         * @since 1.7
2210         */
2211        public static final UnicodeBlock LYCIAN =
2212            new UnicodeBlock("LYCIAN");
2213
2214        /**
2215         * Constant for the "Carian" Unicode character block.
2216         * @since 1.7
2217         */
2218        public static final UnicodeBlock CARIAN =
2219            new UnicodeBlock("CARIAN");
2220
2221        /**
2222         * Constant for the "Old Persian" Unicode character block.
2223         * @since 1.7
2224         */
2225        public static final UnicodeBlock OLD_PERSIAN =
2226            new UnicodeBlock("OLD_PERSIAN",
2227                             "OLD PERSIAN",
2228                             "OLDPERSIAN");
2229
2230        /**
2231         * Constant for the "Imperial Aramaic" Unicode character block.
2232         * @since 1.7
2233         */
2234        public static final UnicodeBlock IMPERIAL_ARAMAIC =
2235            new UnicodeBlock("IMPERIAL_ARAMAIC",
2236                             "IMPERIAL ARAMAIC",
2237                             "IMPERIALARAMAIC");
2238
2239        /**
2240         * Constant for the "Phoenician" Unicode character block.
2241         * @since 1.7
2242         */
2243        public static final UnicodeBlock PHOENICIAN =
2244            new UnicodeBlock("PHOENICIAN");
2245
2246        /**
2247         * Constant for the "Lydian" Unicode character block.
2248         * @since 1.7
2249         */
2250        public static final UnicodeBlock LYDIAN =
2251            new UnicodeBlock("LYDIAN");
2252
2253        /**
2254         * Constant for the "Kharoshthi" Unicode character block.
2255         * @since 1.7
2256         */
2257        public static final UnicodeBlock KHAROSHTHI =
2258            new UnicodeBlock("KHAROSHTHI");
2259
2260        /**
2261         * Constant for the "Old South Arabian" Unicode character block.
2262         * @since 1.7
2263         */
2264        public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2265            new UnicodeBlock("OLD_SOUTH_ARABIAN",
2266                             "OLD SOUTH ARABIAN",
2267                             "OLDSOUTHARABIAN");
2268
2269        /**
2270         * Constant for the "Avestan" Unicode character block.
2271         * @since 1.7
2272         */
2273        public static final UnicodeBlock AVESTAN =
2274            new UnicodeBlock("AVESTAN");
2275
2276        /**
2277         * Constant for the "Inscriptional Parthian" Unicode character block.
2278         * @since 1.7
2279         */
2280        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2281            new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2282                             "INSCRIPTIONAL PARTHIAN",
2283                             "INSCRIPTIONALPARTHIAN");
2284
2285        /**
2286         * Constant for the "Inscriptional Pahlavi" Unicode character block.
2287         * @since 1.7
2288         */
2289        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2290            new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2291                             "INSCRIPTIONAL PAHLAVI",
2292                             "INSCRIPTIONALPAHLAVI");
2293
2294        /**
2295         * Constant for the "Old Turkic" Unicode character block.
2296         * @since 1.7
2297         */
2298        public static final UnicodeBlock OLD_TURKIC =
2299            new UnicodeBlock("OLD_TURKIC",
2300                             "OLD TURKIC",
2301                             "OLDTURKIC");
2302
2303        /**
2304         * Constant for the "Rumi Numeral Symbols" Unicode character block.
2305         * @since 1.7
2306         */
2307        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2308            new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2309                             "RUMI NUMERAL SYMBOLS",
2310                             "RUMINUMERALSYMBOLS");
2311
2312        /**
2313         * Constant for the "Brahmi" Unicode character block.
2314         * @since 1.7
2315         */
2316        public static final UnicodeBlock BRAHMI =
2317            new UnicodeBlock("BRAHMI");
2318
2319        /**
2320         * Constant for the "Kaithi" Unicode character block.
2321         * @since 1.7
2322         */
2323        public static final UnicodeBlock KAITHI =
2324            new UnicodeBlock("KAITHI");
2325
2326        /**
2327         * Constant for the "Cuneiform" Unicode character block.
2328         * @since 1.7
2329         */
2330        public static final UnicodeBlock CUNEIFORM =
2331            new UnicodeBlock("CUNEIFORM");
2332
2333        /**
2334         * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2335         * character block.
2336         * @since 1.7
2337         */
2338        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2339            new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2340                             "CUNEIFORM NUMBERS AND PUNCTUATION",
2341                             "CUNEIFORMNUMBERSANDPUNCTUATION");
2342
2343        /**
2344         * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2345         * @since 1.7
2346         */
2347        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2348            new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2349                             "EGYPTIAN HIEROGLYPHS",
2350                             "EGYPTIANHIEROGLYPHS");
2351
2352        /**
2353         * Constant for the "Bamum Supplement" Unicode character block.
2354         * @since 1.7
2355         */
2356        public static final UnicodeBlock BAMUM_SUPPLEMENT =
2357            new UnicodeBlock("BAMUM_SUPPLEMENT",
2358                             "BAMUM SUPPLEMENT",
2359                             "BAMUMSUPPLEMENT");
2360
2361        /**
2362         * Constant for the "Kana Supplement" Unicode character block.
2363         * @since 1.7
2364         */
2365        public static final UnicodeBlock KANA_SUPPLEMENT =
2366            new UnicodeBlock("KANA_SUPPLEMENT",
2367                             "KANA SUPPLEMENT",
2368                             "KANASUPPLEMENT");
2369
2370        /**
2371         * Constant for the "Ancient Greek Musical Notation" Unicode character
2372         * block.
2373         * @since 1.7
2374         */
2375        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2376            new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2377                             "ANCIENT GREEK MUSICAL NOTATION",
2378                             "ANCIENTGREEKMUSICALNOTATION");
2379
2380        /**
2381         * Constant for the "Counting Rod Numerals" Unicode character block.
2382         * @since 1.7
2383         */
2384        public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2385            new UnicodeBlock("COUNTING_ROD_NUMERALS",
2386                             "COUNTING ROD NUMERALS",
2387                             "COUNTINGRODNUMERALS");
2388
2389        /**
2390         * Constant for the "Mahjong Tiles" Unicode character block.
2391         * @since 1.7
2392         */
2393        public static final UnicodeBlock MAHJONG_TILES =
2394            new UnicodeBlock("MAHJONG_TILES",
2395                             "MAHJONG TILES",
2396                             "MAHJONGTILES");
2397
2398        /**
2399         * Constant for the "Domino Tiles" Unicode character block.
2400         * @since 1.7
2401         */
2402        public static final UnicodeBlock DOMINO_TILES =
2403            new UnicodeBlock("DOMINO_TILES",
2404                             "DOMINO TILES",
2405                             "DOMINOTILES");
2406
2407        /**
2408         * Constant for the "Playing Cards" Unicode character block.
2409         * @since 1.7
2410         */
2411        public static final UnicodeBlock PLAYING_CARDS =
2412            new UnicodeBlock("PLAYING_CARDS",
2413                             "PLAYING CARDS",
2414                             "PLAYINGCARDS");
2415
2416        /**
2417         * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2418         * block.
2419         * @since 1.7
2420         */
2421        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2422            new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2423                             "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2424                             "ENCLOSEDALPHANUMERICSUPPLEMENT");
2425
2426        /**
2427         * Constant for the "Enclosed Ideographic Supplement" Unicode character
2428         * block.
2429         * @since 1.7
2430         */
2431        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2432            new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2433                             "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2434                             "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2435
2436        /**
2437         * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2438         * character block.
2439         * @since 1.7
2440         */
2441        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2442            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2443                             "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2444                             "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2445
2446        /**
2447         * Constant for the "Emoticons" Unicode character block.
2448         * @since 1.7
2449         */
2450        public static final UnicodeBlock EMOTICONS =
2451            new UnicodeBlock("EMOTICONS");
2452
2453        /**
2454         * Constant for the "Transport And Map Symbols" Unicode character block.
2455         * @since 1.7
2456         */
2457        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2458            new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2459                             "TRANSPORT AND MAP SYMBOLS",
2460                             "TRANSPORTANDMAPSYMBOLS");
2461
2462        /**
2463         * Constant for the "Alchemical Symbols" Unicode character block.
2464         * @since 1.7
2465         */
2466        public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2467            new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2468                             "ALCHEMICAL SYMBOLS",
2469                             "ALCHEMICALSYMBOLS");
2470
2471        /**
2472         * Constant for the "CJK Unified Ideographs Extension C" Unicode
2473         * character block.
2474         * @since 1.7
2475         */
2476        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2477            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2478                             "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2479                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2480
2481        /**
2482         * Constant for the "CJK Unified Ideographs Extension D" Unicode
2483         * character block.
2484         * @since 1.7
2485         */
2486        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2487            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2488                             "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2489                             "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2490
2491        /**
2492         * Constant for the "Arabic Extended-A" Unicode character block.
2493         * @since 1.8
2494         */
2495        public static final UnicodeBlock ARABIC_EXTENDED_A =
2496            new UnicodeBlock("ARABIC_EXTENDED_A",
2497                             "ARABIC EXTENDED-A",
2498                             "ARABICEXTENDED-A");
2499
2500        /**
2501         * Constant for the "Sundanese Supplement" Unicode character block.
2502         * @since 1.8
2503         */
2504        public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2505            new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2506                             "SUNDANESE SUPPLEMENT",
2507                             "SUNDANESESUPPLEMENT");
2508
2509        /**
2510         * Constant for the "Meetei Mayek Extensions" Unicode character block.
2511         * @since 1.8
2512         */
2513        public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2514            new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2515                             "MEETEI MAYEK EXTENSIONS",
2516                             "MEETEIMAYEKEXTENSIONS");
2517
2518        /**
2519         * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2520         * @since 1.8
2521         */
2522        public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2523            new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2524                             "MEROITIC HIEROGLYPHS",
2525                             "MEROITICHIEROGLYPHS");
2526
2527        /**
2528         * Constant for the "Meroitic Cursive" Unicode character block.
2529         * @since 1.8
2530         */
2531        public static final UnicodeBlock MEROITIC_CURSIVE =
2532            new UnicodeBlock("MEROITIC_CURSIVE",
2533                             "MEROITIC CURSIVE",
2534                             "MEROITICCURSIVE");
2535
2536        /**
2537         * Constant for the "Sora Sompeng" Unicode character block.
2538         * @since 1.8
2539         */
2540        public static final UnicodeBlock SORA_SOMPENG =
2541            new UnicodeBlock("SORA_SOMPENG",
2542                             "SORA SOMPENG",
2543                             "SORASOMPENG");
2544
2545        /**
2546         * Constant for the "Chakma" Unicode character block.
2547         * @since 1.8
2548         */
2549        public static final UnicodeBlock CHAKMA =
2550            new UnicodeBlock("CHAKMA");
2551
2552        /**
2553         * Constant for the "Sharada" Unicode character block.
2554         * @since 1.8
2555         */
2556        public static final UnicodeBlock SHARADA =
2557            new UnicodeBlock("SHARADA");
2558
2559        /**
2560         * Constant for the "Takri" Unicode character block.
2561         * @since 1.8
2562         */
2563        public static final UnicodeBlock TAKRI =
2564            new UnicodeBlock("TAKRI");
2565
2566        /**
2567         * Constant for the "Miao" Unicode character block.
2568         * @since 1.8
2569         */
2570        public static final UnicodeBlock MIAO =
2571            new UnicodeBlock("MIAO");
2572
2573        /**
2574         * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2575         * character block.
2576         * @since 1.8
2577         */
2578        public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2579            new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2580                             "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2581                             "ARABICMATHEMATICALALPHABETICSYMBOLS");
2582
2583        private static final int blockStarts[] = {
2584            0x0000,   // 0000..007F; Basic Latin
2585            0x0080,   // 0080..00FF; Latin-1 Supplement
2586            0x0100,   // 0100..017F; Latin Extended-A
2587            0x0180,   // 0180..024F; Latin Extended-B
2588            0x0250,   // 0250..02AF; IPA Extensions
2589            0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2590            0x0300,   // 0300..036F; Combining Diacritical Marks
2591            0x0370,   // 0370..03FF; Greek and Coptic
2592            0x0400,   // 0400..04FF; Cyrillic
2593            0x0500,   // 0500..052F; Cyrillic Supplement
2594            0x0530,   // 0530..058F; Armenian
2595            0x0590,   // 0590..05FF; Hebrew
2596            0x0600,   // 0600..06FF; Arabic
2597            0x0700,   // 0700..074F; Syriac
2598            0x0750,   // 0750..077F; Arabic Supplement
2599            0x0780,   // 0780..07BF; Thaana
2600            0x07C0,   // 07C0..07FF; NKo
2601            0x0800,   // 0800..083F; Samaritan
2602            0x0840,   // 0840..085F; Mandaic
2603            0x0860,   //             unassigned
2604            0x08A0,   // 08A0..08FF; Arabic Extended-A
2605            0x0900,   // 0900..097F; Devanagari
2606            0x0980,   // 0980..09FF; Bengali
2607            0x0A00,   // 0A00..0A7F; Gurmukhi
2608            0x0A80,   // 0A80..0AFF; Gujarati
2609            0x0B00,   // 0B00..0B7F; Oriya
2610            0x0B80,   // 0B80..0BFF; Tamil
2611            0x0C00,   // 0C00..0C7F; Telugu
2612            0x0C80,   // 0C80..0CFF; Kannada
2613            0x0D00,   // 0D00..0D7F; Malayalam
2614            0x0D80,   // 0D80..0DFF; Sinhala
2615            0x0E00,   // 0E00..0E7F; Thai
2616            0x0E80,   // 0E80..0EFF; Lao
2617            0x0F00,   // 0F00..0FFF; Tibetan
2618            0x1000,   // 1000..109F; Myanmar
2619            0x10A0,   // 10A0..10FF; Georgian
2620            0x1100,   // 1100..11FF; Hangul Jamo
2621            0x1200,   // 1200..137F; Ethiopic
2622            0x1380,   // 1380..139F; Ethiopic Supplement
2623            0x13A0,   // 13A0..13FF; Cherokee
2624            0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2625            0x1680,   // 1680..169F; Ogham
2626            0x16A0,   // 16A0..16FF; Runic
2627            0x1700,   // 1700..171F; Tagalog
2628            0x1720,   // 1720..173F; Hanunoo
2629            0x1740,   // 1740..175F; Buhid
2630            0x1760,   // 1760..177F; Tagbanwa
2631            0x1780,   // 1780..17FF; Khmer
2632            0x1800,   // 1800..18AF; Mongolian
2633            0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2634            0x1900,   // 1900..194F; Limbu
2635            0x1950,   // 1950..197F; Tai Le
2636            0x1980,   // 1980..19DF; New Tai Lue
2637            0x19E0,   // 19E0..19FF; Khmer Symbols
2638            0x1A00,   // 1A00..1A1F; Buginese
2639            0x1A20,   // 1A20..1AAF; Tai Tham
2640            0x1AB0,   //             unassigned
2641            0x1B00,   // 1B00..1B7F; Balinese
2642            0x1B80,   // 1B80..1BBF; Sundanese
2643            0x1BC0,   // 1BC0..1BFF; Batak
2644            0x1C00,   // 1C00..1C4F; Lepcha
2645            0x1C50,   // 1C50..1C7F; Ol Chiki
2646            0x1C80,   //             unassigned
2647            0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2648            0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2649            0x1D00,   // 1D00..1D7F; Phonetic Extensions
2650            0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2651            0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2652            0x1E00,   // 1E00..1EFF; Latin Extended Additional
2653            0x1F00,   // 1F00..1FFF; Greek Extended
2654            0x2000,   // 2000..206F; General Punctuation
2655            0x2070,   // 2070..209F; Superscripts and Subscripts
2656            0x20A0,   // 20A0..20CF; Currency Symbols
2657            0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2658            0x2100,   // 2100..214F; Letterlike Symbols
2659            0x2150,   // 2150..218F; Number Forms
2660            0x2190,   // 2190..21FF; Arrows
2661            0x2200,   // 2200..22FF; Mathematical Operators
2662            0x2300,   // 2300..23FF; Miscellaneous Technical
2663            0x2400,   // 2400..243F; Control Pictures
2664            0x2440,   // 2440..245F; Optical Character Recognition
2665            0x2460,   // 2460..24FF; Enclosed Alphanumerics
2666            0x2500,   // 2500..257F; Box Drawing
2667            0x2580,   // 2580..259F; Block Elements
2668            0x25A0,   // 25A0..25FF; Geometric Shapes
2669            0x2600,   // 2600..26FF; Miscellaneous Symbols
2670            0x2700,   // 2700..27BF; Dingbats
2671            0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2672            0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2673            0x2800,   // 2800..28FF; Braille Patterns
2674            0x2900,   // 2900..297F; Supplemental Arrows-B
2675            0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2676            0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2677            0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2678            0x2C00,   // 2C00..2C5F; Glagolitic
2679            0x2C60,   // 2C60..2C7F; Latin Extended-C
2680            0x2C80,   // 2C80..2CFF; Coptic
2681            0x2D00,   // 2D00..2D2F; Georgian Supplement
2682            0x2D30,   // 2D30..2D7F; Tifinagh
2683            0x2D80,   // 2D80..2DDF; Ethiopic Extended
2684            0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2685            0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2686            0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2687            0x2F00,   // 2F00..2FDF; Kangxi Radicals
2688            0x2FE0,   //             unassigned
2689            0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2690            0x3000,   // 3000..303F; CJK Symbols and Punctuation
2691            0x3040,   // 3040..309F; Hiragana
2692            0x30A0,   // 30A0..30FF; Katakana
2693            0x3100,   // 3100..312F; Bopomofo
2694            0x3130,   // 3130..318F; Hangul Compatibility Jamo
2695            0x3190,   // 3190..319F; Kanbun
2696            0x31A0,   // 31A0..31BF; Bopomofo Extended
2697            0x31C0,   // 31C0..31EF; CJK Strokes
2698            0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2699            0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2700            0x3300,   // 3300..33FF; CJK Compatibility
2701            0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2702            0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2703            0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2704            0xA000,   // A000..A48F; Yi Syllables
2705            0xA490,   // A490..A4CF; Yi Radicals
2706            0xA4D0,   // A4D0..A4FF; Lisu
2707            0xA500,   // A500..A63F; Vai
2708            0xA640,   // A640..A69F; Cyrillic Extended-B
2709            0xA6A0,   // A6A0..A6FF; Bamum
2710            0xA700,   // A700..A71F; Modifier Tone Letters
2711            0xA720,   // A720..A7FF; Latin Extended-D
2712            0xA800,   // A800..A82F; Syloti Nagri
2713            0xA830,   // A830..A83F; Common Indic Number Forms
2714            0xA840,   // A840..A87F; Phags-pa
2715            0xA880,   // A880..A8DF; Saurashtra
2716            0xA8E0,   // A8E0..A8FF; Devanagari Extended
2717            0xA900,   // A900..A92F; Kayah Li
2718            0xA930,   // A930..A95F; Rejang
2719            0xA960,   // A960..A97F; Hangul Jamo Extended-A
2720            0xA980,   // A980..A9DF; Javanese
2721            0xA9E0,   //             unassigned
2722            0xAA00,   // AA00..AA5F; Cham
2723            0xAA60,   // AA60..AA7F; Myanmar Extended-A
2724            0xAA80,   // AA80..AADF; Tai Viet
2725            0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2726            0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2727            0xAB30,   //             unassigned
2728            0xABC0,   // ABC0..ABFF; Meetei Mayek
2729            0xAC00,   // AC00..D7AF; Hangul Syllables
2730            0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2731            0xD800,   // D800..DB7F; High Surrogates
2732            0xDB80,   // DB80..DBFF; High Private Use Surrogates
2733            0xDC00,   // DC00..DFFF; Low Surrogates
2734            0xE000,   // E000..F8FF; Private Use Area
2735            0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2736            0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2737            0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2738            0xFE00,   // FE00..FE0F; Variation Selectors
2739            0xFE10,   // FE10..FE1F; Vertical Forms
2740            0xFE20,   // FE20..FE2F; Combining Half Marks
2741            0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2742            0xFE50,   // FE50..FE6F; Small Form Variants
2743            0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2744            0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2745            0xFFF0,   // FFF0..FFFF; Specials
2746            0x10000,  // 10000..1007F; Linear B Syllabary
2747            0x10080,  // 10080..100FF; Linear B Ideograms
2748            0x10100,  // 10100..1013F; Aegean Numbers
2749            0x10140,  // 10140..1018F; Ancient Greek Numbers
2750            0x10190,  // 10190..101CF; Ancient Symbols
2751            0x101D0,  // 101D0..101FF; Phaistos Disc
2752            0x10200,  //               unassigned
2753            0x10280,  // 10280..1029F; Lycian
2754            0x102A0,  // 102A0..102DF; Carian
2755            0x102E0,  //               unassigned
2756            0x10300,  // 10300..1032F; Old Italic
2757            0x10330,  // 10330..1034F; Gothic
2758            0x10350,  //               unassigned
2759            0x10380,  // 10380..1039F; Ugaritic
2760            0x103A0,  // 103A0..103DF; Old Persian
2761            0x103E0,  //               unassigned
2762            0x10400,  // 10400..1044F; Deseret
2763            0x10450,  // 10450..1047F; Shavian
2764            0x10480,  // 10480..104AF; Osmanya
2765            0x104B0,  //               unassigned
2766            0x10800,  // 10800..1083F; Cypriot Syllabary
2767            0x10840,  // 10840..1085F; Imperial Aramaic
2768            0x10860,  //               unassigned
2769            0x10900,  // 10900..1091F; Phoenician
2770            0x10920,  // 10920..1093F; Lydian
2771            0x10940,  //               unassigned
2772            0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2773            0x109A0,  // 109A0..109FF; Meroitic Cursive
2774            0x10A00,  // 10A00..10A5F; Kharoshthi
2775            0x10A60,  // 10A60..10A7F; Old South Arabian
2776            0x10A80,  //               unassigned
2777            0x10B00,  // 10B00..10B3F; Avestan
2778            0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2779            0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2780            0x10B80,  //               unassigned
2781            0x10C00,  // 10C00..10C4F; Old Turkic
2782            0x10C50,  //               unassigned
2783            0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2784            0x10E80,  //               unassigned
2785            0x11000,  // 11000..1107F; Brahmi
2786            0x11080,  // 11080..110CF; Kaithi
2787            0x110D0,  // 110D0..110FF; Sora Sompeng
2788            0x11100,  // 11100..1114F; Chakma
2789            0x11150,  //               unassigned
2790            0x11180,  // 11180..111DF; Sharada
2791            0x111E0,  //               unassigned
2792            0x11680,  // 11680..116CF; Takri
2793            0x116D0,  //               unassigned
2794            0x12000,  // 12000..123FF; Cuneiform
2795            0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2796            0x12480,  //               unassigned
2797            0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2798            0x13430,  //               unassigned
2799            0x16800,  // 16800..16A3F; Bamum Supplement
2800            0x16A40,  //               unassigned
2801            0x16F00,  // 16F00..16F9F; Miao
2802            0x16FA0,  //               unassigned
2803            0x1B000,  // 1B000..1B0FF; Kana Supplement
2804            0x1B100,  //               unassigned
2805            0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2806            0x1D100,  // 1D100..1D1FF; Musical Symbols
2807            0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2808            0x1D250,  //               unassigned
2809            0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2810            0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2811            0x1D380,  //               unassigned
2812            0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2813            0x1D800,  //               unassigned
2814            0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2815            0x1EF00,  //               unassigned
2816            0x1F000,  // 1F000..1F02F; Mahjong Tiles
2817            0x1F030,  // 1F030..1F09F; Domino Tiles
2818            0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2819            0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2820            0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2821            0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2822            0x1F600,  // 1F600..1F64F; Emoticons
2823            0x1F650,  //               unassigned
2824            0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2825            0x1F700,  // 1F700..1F77F; Alchemical Symbols
2826            0x1F780,  //               unassigned
2827            0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2828            0x2A6E0,  //               unassigned
2829            0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2830            0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2831            0x2B820,  //               unassigned
2832            0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2833            0x2FA20,  //               unassigned
2834            0xE0000,  // E0000..E007F; Tags
2835            0xE0080,  //               unassigned
2836            0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2837            0xE01F0,  //               unassigned
2838            0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2839            0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2840        };
2841
2842        private static final UnicodeBlock[] blocks = {
2843            BASIC_LATIN,
2844            LATIN_1_SUPPLEMENT,
2845            LATIN_EXTENDED_A,
2846            LATIN_EXTENDED_B,
2847            IPA_EXTENSIONS,
2848            SPACING_MODIFIER_LETTERS,
2849            COMBINING_DIACRITICAL_MARKS,
2850            GREEK,
2851            CYRILLIC,
2852            CYRILLIC_SUPPLEMENTARY,
2853            ARMENIAN,
2854            HEBREW,
2855            ARABIC,
2856            SYRIAC,
2857            ARABIC_SUPPLEMENT,
2858            THAANA,
2859            NKO,
2860            SAMARITAN,
2861            MANDAIC,
2862            null,
2863            ARABIC_EXTENDED_A,
2864            DEVANAGARI,
2865            BENGALI,
2866            GURMUKHI,
2867            GUJARATI,
2868            ORIYA,
2869            TAMIL,
2870            TELUGU,
2871            KANNADA,
2872            MALAYALAM,
2873            SINHALA,
2874            THAI,
2875            LAO,
2876            TIBETAN,
2877            MYANMAR,
2878            GEORGIAN,
2879            HANGUL_JAMO,
2880            ETHIOPIC,
2881            ETHIOPIC_SUPPLEMENT,
2882            CHEROKEE,
2883            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2884            OGHAM,
2885            RUNIC,
2886            TAGALOG,
2887            HANUNOO,
2888            BUHID,
2889            TAGBANWA,
2890            KHMER,
2891            MONGOLIAN,
2892            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2893            LIMBU,
2894            TAI_LE,
2895            NEW_TAI_LUE,
2896            KHMER_SYMBOLS,
2897            BUGINESE,
2898            TAI_THAM,
2899            null,
2900            BALINESE,
2901            SUNDANESE,
2902            BATAK,
2903            LEPCHA,
2904            OL_CHIKI,
2905            null,
2906            SUNDANESE_SUPPLEMENT,
2907            VEDIC_EXTENSIONS,
2908            PHONETIC_EXTENSIONS,
2909            PHONETIC_EXTENSIONS_SUPPLEMENT,
2910            COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2911            LATIN_EXTENDED_ADDITIONAL,
2912            GREEK_EXTENDED,
2913            GENERAL_PUNCTUATION,
2914            SUPERSCRIPTS_AND_SUBSCRIPTS,
2915            CURRENCY_SYMBOLS,
2916            COMBINING_MARKS_FOR_SYMBOLS,
2917            LETTERLIKE_SYMBOLS,
2918            NUMBER_FORMS,
2919            ARROWS,
2920            MATHEMATICAL_OPERATORS,
2921            MISCELLANEOUS_TECHNICAL,
2922            CONTROL_PICTURES,
2923            OPTICAL_CHARACTER_RECOGNITION,
2924            ENCLOSED_ALPHANUMERICS,
2925            BOX_DRAWING,
2926            BLOCK_ELEMENTS,
2927            GEOMETRIC_SHAPES,
2928            MISCELLANEOUS_SYMBOLS,
2929            DINGBATS,
2930            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2931            SUPPLEMENTAL_ARROWS_A,
2932            BRAILLE_PATTERNS,
2933            SUPPLEMENTAL_ARROWS_B,
2934            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2935            SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2936            MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2937            GLAGOLITIC,
2938            LATIN_EXTENDED_C,
2939            COPTIC,
2940            GEORGIAN_SUPPLEMENT,
2941            TIFINAGH,
2942            ETHIOPIC_EXTENDED,
2943            CYRILLIC_EXTENDED_A,
2944            SUPPLEMENTAL_PUNCTUATION,
2945            CJK_RADICALS_SUPPLEMENT,
2946            KANGXI_RADICALS,
2947            null,
2948            IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2949            CJK_SYMBOLS_AND_PUNCTUATION,
2950            HIRAGANA,
2951            KATAKANA,
2952            BOPOMOFO,
2953            HANGUL_COMPATIBILITY_JAMO,
2954            KANBUN,
2955            BOPOMOFO_EXTENDED,
2956            CJK_STROKES,
2957            KATAKANA_PHONETIC_EXTENSIONS,
2958            ENCLOSED_CJK_LETTERS_AND_MONTHS,
2959            CJK_COMPATIBILITY,
2960            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2961            YIJING_HEXAGRAM_SYMBOLS,
2962            CJK_UNIFIED_IDEOGRAPHS,
2963            YI_SYLLABLES,
2964            YI_RADICALS,
2965            LISU,
2966            VAI,
2967            CYRILLIC_EXTENDED_B,
2968            BAMUM,
2969            MODIFIER_TONE_LETTERS,
2970            LATIN_EXTENDED_D,
2971            SYLOTI_NAGRI,
2972            COMMON_INDIC_NUMBER_FORMS,
2973            PHAGS_PA,
2974            SAURASHTRA,
2975            DEVANAGARI_EXTENDED,
2976            KAYAH_LI,
2977            REJANG,
2978            HANGUL_JAMO_EXTENDED_A,
2979            JAVANESE,
2980            null,
2981            CHAM,
2982            MYANMAR_EXTENDED_A,
2983            TAI_VIET,
2984            MEETEI_MAYEK_EXTENSIONS,
2985            ETHIOPIC_EXTENDED_A,
2986            null,
2987            MEETEI_MAYEK,
2988            HANGUL_SYLLABLES,
2989            HANGUL_JAMO_EXTENDED_B,
2990            HIGH_SURROGATES,
2991            HIGH_PRIVATE_USE_SURROGATES,
2992            LOW_SURROGATES,
2993            PRIVATE_USE_AREA,
2994            CJK_COMPATIBILITY_IDEOGRAPHS,
2995            ALPHABETIC_PRESENTATION_FORMS,
2996            ARABIC_PRESENTATION_FORMS_A,
2997            VARIATION_SELECTORS,
2998            VERTICAL_FORMS,
2999            COMBINING_HALF_MARKS,
3000            CJK_COMPATIBILITY_FORMS,
3001            SMALL_FORM_VARIANTS,
3002            ARABIC_PRESENTATION_FORMS_B,
3003            HALFWIDTH_AND_FULLWIDTH_FORMS,
3004            SPECIALS,
3005            LINEAR_B_SYLLABARY,
3006            LINEAR_B_IDEOGRAMS,
3007            AEGEAN_NUMBERS,
3008            ANCIENT_GREEK_NUMBERS,
3009            ANCIENT_SYMBOLS,
3010            PHAISTOS_DISC,
3011            null,
3012            LYCIAN,
3013            CARIAN,
3014            null,
3015            OLD_ITALIC,
3016            GOTHIC,
3017            null,
3018            UGARITIC,
3019            OLD_PERSIAN,
3020            null,
3021            DESERET,
3022            SHAVIAN,
3023            OSMANYA,
3024            null,
3025            CYPRIOT_SYLLABARY,
3026            IMPERIAL_ARAMAIC,
3027            null,
3028            PHOENICIAN,
3029            LYDIAN,
3030            null,
3031            MEROITIC_HIEROGLYPHS,
3032            MEROITIC_CURSIVE,
3033            KHAROSHTHI,
3034            OLD_SOUTH_ARABIAN,
3035            null,
3036            AVESTAN,
3037            INSCRIPTIONAL_PARTHIAN,
3038            INSCRIPTIONAL_PAHLAVI,
3039            null,
3040            OLD_TURKIC,
3041            null,
3042            RUMI_NUMERAL_SYMBOLS,
3043            null,
3044            BRAHMI,
3045            KAITHI,
3046            SORA_SOMPENG,
3047            CHAKMA,
3048            null,
3049            SHARADA,
3050            null,
3051            TAKRI,
3052            null,
3053            CUNEIFORM,
3054            CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3055            null,
3056            EGYPTIAN_HIEROGLYPHS,
3057            null,
3058            BAMUM_SUPPLEMENT,
3059            null,
3060            MIAO,
3061            null,
3062            KANA_SUPPLEMENT,
3063            null,
3064            BYZANTINE_MUSICAL_SYMBOLS,
3065            MUSICAL_SYMBOLS,
3066            ANCIENT_GREEK_MUSICAL_NOTATION,
3067            null,
3068            TAI_XUAN_JING_SYMBOLS,
3069            COUNTING_ROD_NUMERALS,
3070            null,
3071            MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3072            null,
3073            ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3074            null,
3075            MAHJONG_TILES,
3076            DOMINO_TILES,
3077            PLAYING_CARDS,
3078            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3079            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3080            MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3081            EMOTICONS,
3082            null,
3083            TRANSPORT_AND_MAP_SYMBOLS,
3084            ALCHEMICAL_SYMBOLS,
3085            null,
3086            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3087            null,
3088            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3089            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3090            null,
3091            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3092            null,
3093            TAGS,
3094            null,
3095            VARIATION_SELECTORS_SUPPLEMENT,
3096            null,
3097            SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3098            SUPPLEMENTARY_PRIVATE_USE_AREA_B
3099        };
3100
3101
3102        /**
3103         * Returns the object representing the Unicode block containing the
3104         * given character, or {@code null} if the character is not a
3105         * member of a defined block.
3106         *
3107         * <p><b>Note:</b> This method cannot handle
3108         * <a href="Character.html#supplementary"> supplementary
3109         * characters</a>.  To support all Unicode characters, including
3110         * supplementary characters, use the {@link #of(int)} method.
3111         *
3112         * @param   c  The character in question
3113         * @return  The {@code UnicodeBlock} instance representing the
3114         *          Unicode block of which this character is a member, or
3115         *          {@code null} if the character is not a member of any
3116         *          Unicode block
3117         */
3118        public static UnicodeBlock of(char c) {
3119            return of((int)c);
3120        }
3121
3122        /**
3123         * Returns the object representing the Unicode block
3124         * containing the given character (Unicode code point), or
3125         * {@code null} if the character is not a member of a
3126         * defined block.
3127         *
3128         * @param   codePoint the character (Unicode code point) in question.
3129         * @return  The {@code UnicodeBlock} instance representing the
3130         *          Unicode block of which this character is a member, or
3131         *          {@code null} if the character is not a member of any
3132         *          Unicode block
3133         * @exception IllegalArgumentException if the specified
3134         * {@code codePoint} is an invalid Unicode code point.
3135         * @see Character#isValidCodePoint(int)
3136         * @since   1.5
3137         */
3138        public static UnicodeBlock of(int codePoint) {
3139            if (!isValidCodePoint(codePoint)) {
3140                throw new IllegalArgumentException();
3141            }
3142
3143            int top, bottom, current;
3144            bottom = 0;
3145            top = blockStarts.length;
3146            current = top/2;
3147
3148            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3149            while (top - bottom > 1) {
3150                if (codePoint >= blockStarts[current]) {
3151                    bottom = current;
3152                } else {
3153                    top = current;
3154                }
3155                current = (top + bottom) / 2;
3156            }
3157            return blocks[current];
3158        }
3159
3160        /**
3161         * Returns the UnicodeBlock with the given name. Block
3162         * names are determined by The Unicode Standard. The file
3163         * Blocks-&lt;version&gt;.txt defines blocks for a particular
3164         * version of the standard. The {@link Character} class specifies
3165         * the version of the standard that it supports.
3166         * <p>
3167         * This method accepts block names in the following forms:
3168         * <ol>
3169         * <li> Canonical block names as defined by the Unicode Standard.
3170         * For example, the standard defines a "Basic Latin" block. Therefore, this
3171         * method accepts "Basic Latin" as a valid block name. The documentation of
3172         * each UnicodeBlock provides the canonical name.
3173         * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3174         * is a valid block name for the "Basic Latin" block.
3175         * <li>The text representation of each constant UnicodeBlock identifier.
3176         * For example, this method will return the {@link #BASIC_LATIN} block if
3177         * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3178         * hyphens in the canonical name with underscores.
3179         * </ol>
3180         * Finally, character case is ignored for all of the valid block name forms.
3181         * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3182         * The en_US locale's case mapping rules are used to provide case-insensitive
3183         * string comparisons for block name validation.
3184         * <p>
3185         * If the Unicode Standard changes block names, both the previous and
3186         * current names will be accepted.
3187         *
3188         * @param blockName A {@code UnicodeBlock} name.
3189         * @return The {@code UnicodeBlock} instance identified
3190         *         by {@code blockName}
3191         * @throws IllegalArgumentException if {@code blockName} is an
3192         *         invalid name
3193         * @throws NullPointerException if {@code blockName} is null
3194         * @since 1.5
3195         */
3196        public static final UnicodeBlock forName(String blockName) {
3197            UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3198            if (block == null) {
3199                throw new IllegalArgumentException();
3200            }
3201            return block;
3202        }
3203    }
3204
3205
3206    /**
3207     * A family of character subsets representing the character scripts
3208     * defined in the <a href="http://www.unicode.org/reports/tr24/">
3209     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3210     * character is assigned to a single Unicode script, either a specific
3211     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3212     * one of the following three special values,
3213     * {@link Character.UnicodeScript#INHERITED Inherited},
3214     * {@link Character.UnicodeScript#COMMON Common} or
3215     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3216     *
3217     * @since 1.7
3218     */
3219    public static enum UnicodeScript {
3220        /**
3221         * Unicode script "Common".
3222         */
3223        COMMON,
3224
3225        /**
3226         * Unicode script "Latin".
3227         */
3228        LATIN,
3229
3230        /**
3231         * Unicode script "Greek".
3232         */
3233        GREEK,
3234
3235        /**
3236         * Unicode script "Cyrillic".
3237         */
3238        CYRILLIC,
3239
3240        /**
3241         * Unicode script "Armenian".
3242         */
3243        ARMENIAN,
3244
3245        /**
3246         * Unicode script "Hebrew".
3247         */
3248        HEBREW,
3249
3250        /**
3251         * Unicode script "Arabic".
3252         */
3253        ARABIC,
3254
3255        /**
3256         * Unicode script "Syriac".
3257         */
3258        SYRIAC,
3259
3260        /**
3261         * Unicode script "Thaana".
3262         */
3263        THAANA,
3264
3265        /**
3266         * Unicode script "Devanagari".
3267         */
3268        DEVANAGARI,
3269
3270        /**
3271         * Unicode script "Bengali".
3272         */
3273        BENGALI,
3274
3275        /**
3276         * Unicode script "Gurmukhi".
3277         */
3278        GURMUKHI,
3279
3280        /**
3281         * Unicode script "Gujarati".
3282         */
3283        GUJARATI,
3284
3285        /**
3286         * Unicode script "Oriya".
3287         */
3288        ORIYA,
3289
3290        /**
3291         * Unicode script "Tamil".
3292         */
3293        TAMIL,
3294
3295        /**
3296         * Unicode script "Telugu".
3297         */
3298        TELUGU,
3299
3300        /**
3301         * Unicode script "Kannada".
3302         */
3303        KANNADA,
3304
3305        /**
3306         * Unicode script "Malayalam".
3307         */
3308        MALAYALAM,
3309
3310        /**
3311         * Unicode script "Sinhala".
3312         */
3313        SINHALA,
3314
3315        /**
3316         * Unicode script "Thai".
3317         */
3318        THAI,
3319
3320        /**
3321         * Unicode script "Lao".
3322         */
3323        LAO,
3324
3325        /**
3326         * Unicode script "Tibetan".
3327         */
3328        TIBETAN,
3329
3330        /**
3331         * Unicode script "Myanmar".
3332         */
3333        MYANMAR,
3334
3335        /**
3336         * Unicode script "Georgian".
3337         */
3338        GEORGIAN,
3339
3340        /**
3341         * Unicode script "Hangul".
3342         */
3343        HANGUL,
3344
3345        /**
3346         * Unicode script "Ethiopic".
3347         */
3348        ETHIOPIC,
3349
3350        /**
3351         * Unicode script "Cherokee".
3352         */
3353        CHEROKEE,
3354
3355        /**
3356         * Unicode script "Canadian_Aboriginal".
3357         */
3358        CANADIAN_ABORIGINAL,
3359
3360        /**
3361         * Unicode script "Ogham".
3362         */
3363        OGHAM,
3364
3365        /**
3366         * Unicode script "Runic".
3367         */
3368        RUNIC,
3369
3370        /**
3371         * Unicode script "Khmer".
3372         */
3373        KHMER,
3374
3375        /**
3376         * Unicode script "Mongolian".
3377         */
3378        MONGOLIAN,
3379
3380        /**
3381         * Unicode script "Hiragana".
3382         */
3383        HIRAGANA,
3384
3385        /**
3386         * Unicode script "Katakana".
3387         */
3388        KATAKANA,
3389
3390        /**
3391         * Unicode script "Bopomofo".
3392         */
3393        BOPOMOFO,
3394
3395        /**
3396         * Unicode script "Han".
3397         */
3398        HAN,
3399
3400        /**
3401         * Unicode script "Yi".
3402         */
3403        YI,
3404
3405        /**
3406         * Unicode script "Old_Italic".
3407         */
3408        OLD_ITALIC,
3409
3410        /**
3411         * Unicode script "Gothic".
3412         */
3413        GOTHIC,
3414
3415        /**
3416         * Unicode script "Deseret".
3417         */
3418        DESERET,
3419
3420        /**
3421         * Unicode script "Inherited".
3422         */
3423        INHERITED,
3424
3425        /**
3426         * Unicode script "Tagalog".
3427         */
3428        TAGALOG,
3429
3430        /**
3431         * Unicode script "Hanunoo".
3432         */
3433        HANUNOO,
3434
3435        /**
3436         * Unicode script "Buhid".
3437         */
3438        BUHID,
3439
3440        /**
3441         * Unicode script "Tagbanwa".
3442         */
3443        TAGBANWA,
3444
3445        /**
3446         * Unicode script "Limbu".
3447         */
3448        LIMBU,
3449
3450        /**
3451         * Unicode script "Tai_Le".
3452         */
3453        TAI_LE,
3454
3455        /**
3456         * Unicode script "Linear_B".
3457         */
3458        LINEAR_B,
3459
3460        /**
3461         * Unicode script "Ugaritic".
3462         */
3463        UGARITIC,
3464
3465        /**
3466         * Unicode script "Shavian".
3467         */
3468        SHAVIAN,
3469
3470        /**
3471         * Unicode script "Osmanya".
3472         */
3473        OSMANYA,
3474
3475        /**
3476         * Unicode script "Cypriot".
3477         */
3478        CYPRIOT,
3479
3480        /**
3481         * Unicode script "Braille".
3482         */
3483        BRAILLE,
3484
3485        /**
3486         * Unicode script "Buginese".
3487         */
3488        BUGINESE,
3489
3490        /**
3491         * Unicode script "Coptic".
3492         */
3493        COPTIC,
3494
3495        /**
3496         * Unicode script "New_Tai_Lue".
3497         */
3498        NEW_TAI_LUE,
3499
3500        /**
3501         * Unicode script "Glagolitic".
3502         */
3503        GLAGOLITIC,
3504
3505        /**
3506         * Unicode script "Tifinagh".
3507         */
3508        TIFINAGH,
3509
3510        /**
3511         * Unicode script "Syloti_Nagri".
3512         */
3513        SYLOTI_NAGRI,
3514
3515        /**
3516         * Unicode script "Old_Persian".
3517         */
3518        OLD_PERSIAN,
3519
3520        /**
3521         * Unicode script "Kharoshthi".
3522         */
3523        KHAROSHTHI,
3524
3525        /**
3526         * Unicode script "Balinese".
3527         */
3528        BALINESE,
3529
3530        /**
3531         * Unicode script "Cuneiform".
3532         */
3533        CUNEIFORM,
3534
3535        /**
3536         * Unicode script "Phoenician".
3537         */
3538        PHOENICIAN,
3539
3540        /**
3541         * Unicode script "Phags_Pa".
3542         */
3543        PHAGS_PA,
3544
3545        /**
3546         * Unicode script "Nko".
3547         */
3548        NKO,
3549
3550        /**
3551         * Unicode script "Sundanese".
3552         */
3553        SUNDANESE,
3554
3555        /**
3556         * Unicode script "Batak".
3557         */
3558        BATAK,
3559
3560        /**
3561         * Unicode script "Lepcha".
3562         */
3563        LEPCHA,
3564
3565        /**
3566         * Unicode script "Ol_Chiki".
3567         */
3568        OL_CHIKI,
3569
3570        /**
3571         * Unicode script "Vai".
3572         */
3573        VAI,
3574
3575        /**
3576         * Unicode script "Saurashtra".
3577         */
3578        SAURASHTRA,
3579
3580        /**
3581         * Unicode script "Kayah_Li".
3582         */
3583        KAYAH_LI,
3584
3585        /**
3586         * Unicode script "Rejang".
3587         */
3588        REJANG,
3589
3590        /**
3591         * Unicode script "Lycian".
3592         */
3593        LYCIAN,
3594
3595        /**
3596         * Unicode script "Carian".
3597         */
3598        CARIAN,
3599
3600        /**
3601         * Unicode script "Lydian".
3602         */
3603        LYDIAN,
3604
3605        /**
3606         * Unicode script "Cham".
3607         */
3608        CHAM,
3609
3610        /**
3611         * Unicode script "Tai_Tham".
3612         */
3613        TAI_THAM,
3614
3615        /**
3616         * Unicode script "Tai_Viet".
3617         */
3618        TAI_VIET,
3619
3620        /**
3621         * Unicode script "Avestan".
3622         */
3623        AVESTAN,
3624
3625        /**
3626         * Unicode script "Egyptian_Hieroglyphs".
3627         */
3628        EGYPTIAN_HIEROGLYPHS,
3629
3630        /**
3631         * Unicode script "Samaritan".
3632         */
3633        SAMARITAN,
3634
3635        /**
3636         * Unicode script "Mandaic".
3637         */
3638        MANDAIC,
3639
3640        /**
3641         * Unicode script "Lisu".
3642         */
3643        LISU,
3644
3645        /**
3646         * Unicode script "Bamum".
3647         */
3648        BAMUM,
3649
3650        /**
3651         * Unicode script "Javanese".
3652         */
3653        JAVANESE,
3654
3655        /**
3656         * Unicode script "Meetei_Mayek".
3657         */
3658        MEETEI_MAYEK,
3659
3660        /**
3661         * Unicode script "Imperial_Aramaic".
3662         */
3663        IMPERIAL_ARAMAIC,
3664
3665        /**
3666         * Unicode script "Old_South_Arabian".
3667         */
3668        OLD_SOUTH_ARABIAN,
3669
3670        /**
3671         * Unicode script "Inscriptional_Parthian".
3672         */
3673        INSCRIPTIONAL_PARTHIAN,
3674
3675        /**
3676         * Unicode script "Inscriptional_Pahlavi".
3677         */
3678        INSCRIPTIONAL_PAHLAVI,
3679
3680        /**
3681         * Unicode script "Old_Turkic".
3682         */
3683        OLD_TURKIC,
3684
3685        /**
3686         * Unicode script "Brahmi".
3687         */
3688        BRAHMI,
3689
3690        /**
3691         * Unicode script "Kaithi".
3692         */
3693        KAITHI,
3694
3695        /**
3696         * Unicode script "Meroitic Hieroglyphs".
3697         */
3698        MEROITIC_HIEROGLYPHS,
3699
3700        /**
3701         * Unicode script "Meroitic Cursive".
3702         */
3703        MEROITIC_CURSIVE,
3704
3705        /**
3706         * Unicode script "Sora Sompeng".
3707         */
3708        SORA_SOMPENG,
3709
3710        /**
3711         * Unicode script "Chakma".
3712         */
3713        CHAKMA,
3714
3715        /**
3716         * Unicode script "Sharada".
3717         */
3718        SHARADA,
3719
3720        /**
3721         * Unicode script "Takri".
3722         */
3723        TAKRI,
3724
3725        /**
3726         * Unicode script "Miao".
3727         */
3728        MIAO,
3729
3730        /**
3731         * Unicode script "Unknown".
3732         */
3733        UNKNOWN;
3734
3735        private static final int[] scriptStarts = {
3736            0x0000,   // 0000..0040; COMMON
3737            0x0041,   // 0041..005A; LATIN
3738            0x005B,   // 005B..0060; COMMON
3739            0x0061,   // 0061..007A; LATIN
3740            0x007B,   // 007B..00A9; COMMON
3741            0x00AA,   // 00AA..00AA; LATIN
3742            0x00AB,   // 00AB..00B9; COMMON
3743            0x00BA,   // 00BA..00BA; LATIN
3744            0x00BB,   // 00BB..00BF; COMMON
3745            0x00C0,   // 00C0..00D6; LATIN
3746            0x00D7,   // 00D7..00D7; COMMON
3747            0x00D8,   // 00D8..00F6; LATIN
3748            0x00F7,   // 00F7..00F7; COMMON
3749            0x00F8,   // 00F8..02B8; LATIN
3750            0x02B9,   // 02B9..02DF; COMMON
3751            0x02E0,   // 02E0..02E4; LATIN
3752            0x02E5,   // 02E5..02E9; COMMON
3753            0x02EA,   // 02EA..02EB; BOPOMOFO
3754            0x02EC,   // 02EC..02FF; COMMON
3755            0x0300,   // 0300..036F; INHERITED
3756            0x0370,   // 0370..0373; GREEK
3757            0x0374,   // 0374..0374; COMMON
3758            0x0375,   // 0375..037D; GREEK
3759            0x037E,   // 037E..0383; COMMON
3760            0x0384,   // 0384..0384; GREEK
3761            0x0385,   // 0385..0385; COMMON
3762            0x0386,   // 0386..0386; GREEK
3763            0x0387,   // 0387..0387; COMMON
3764            0x0388,   // 0388..03E1; GREEK
3765            0x03E2,   // 03E2..03EF; COPTIC
3766            0x03F0,   // 03F0..03FF; GREEK
3767            0x0400,   // 0400..0484; CYRILLIC
3768            0x0485,   // 0485..0486; INHERITED
3769            0x0487,   // 0487..0530; CYRILLIC
3770            0x0531,   // 0531..0588; ARMENIAN
3771            0x0589,   // 0589..0589; COMMON
3772            0x058A,   // 058A..0590; ARMENIAN
3773            0x0591,   // 0591..05FF; HEBREW
3774            0x0600,   // 0600..060B; ARABIC
3775            0x060C,   // 060C..060C; COMMON
3776            0x060D,   // 060D..061A; ARABIC
3777            0x061B,   // 061B..061D; COMMON
3778            0x061E,   // 061E..061E; ARABIC
3779            0x061F,   // 061F..061F; COMMON
3780            0x0620,   // 0620..063F; ARABIC
3781            0x0640,   // 0640..0640; COMMON
3782            0x0641,   // 0641..064A; ARABIC
3783            0x064B,   // 064B..0655; INHERITED
3784            0x0656,   // 0656..065F; ARABIC
3785            0x0660,   // 0660..0669; COMMON
3786            0x066A,   // 066A..066F; ARABIC
3787            0x0670,   // 0670..0670; INHERITED
3788            0x0671,   // 0671..06DC; ARABIC
3789            0x06DD,   // 06DD..06DD; COMMON
3790            0x06DE,   // 06DE..06FF; ARABIC
3791            0x0700,   // 0700..074F; SYRIAC
3792            0x0750,   // 0750..077F; ARABIC
3793            0x0780,   // 0780..07BF; THAANA
3794            0x07C0,   // 07C0..07FF; NKO
3795            0x0800,   // 0800..083F; SAMARITAN
3796            0x0840,   // 0840..089F; MANDAIC
3797            0x08A0,   // 08A0..08FF; ARABIC
3798            0x0900,   // 0900..0950; DEVANAGARI
3799            0x0951,   // 0951..0952; INHERITED
3800            0x0953,   // 0953..0963; DEVANAGARI
3801            0x0964,   // 0964..0965; COMMON
3802            0x0966,   // 0966..0980; DEVANAGARI
3803            0x0981,   // 0981..0A00; BENGALI
3804            0x0A01,   // 0A01..0A80; GURMUKHI
3805            0x0A81,   // 0A81..0B00; GUJARATI
3806            0x0B01,   // 0B01..0B81; ORIYA
3807            0x0B82,   // 0B82..0C00; TAMIL
3808            0x0C01,   // 0C01..0C81; TELUGU
3809            0x0C82,   // 0C82..0CF0; KANNADA
3810            0x0D02,   // 0D02..0D81; MALAYALAM
3811            0x0D82,   // 0D82..0E00; SINHALA
3812            0x0E01,   // 0E01..0E3E; THAI
3813            0x0E3F,   // 0E3F..0E3F; COMMON
3814            0x0E40,   // 0E40..0E80; THAI
3815            0x0E81,   // 0E81..0EFF; LAO
3816            0x0F00,   // 0F00..0FD4; TIBETAN
3817            0x0FD5,   // 0FD5..0FD8; COMMON
3818            0x0FD9,   // 0FD9..0FFF; TIBETAN
3819            0x1000,   // 1000..109F; MYANMAR
3820            0x10A0,   // 10A0..10FA; GEORGIAN
3821            0x10FB,   // 10FB..10FB; COMMON
3822            0x10FC,   // 10FC..10FF; GEORGIAN
3823            0x1100,   // 1100..11FF; HANGUL
3824            0x1200,   // 1200..139F; ETHIOPIC
3825            0x13A0,   // 13A0..13FF; CHEROKEE
3826            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3827            0x1680,   // 1680..169F; OGHAM
3828            0x16A0,   // 16A0..16EA; RUNIC
3829            0x16EB,   // 16EB..16ED; COMMON
3830            0x16EE,   // 16EE..16FF; RUNIC
3831            0x1700,   // 1700..171F; TAGALOG
3832            0x1720,   // 1720..1734; HANUNOO
3833            0x1735,   // 1735..173F; COMMON
3834            0x1740,   // 1740..175F; BUHID
3835            0x1760,   // 1760..177F; TAGBANWA
3836            0x1780,   // 1780..17FF; KHMER
3837            0x1800,   // 1800..1801; MONGOLIAN
3838            0x1802,   // 1802..1803; COMMON
3839            0x1804,   // 1804..1804; MONGOLIAN
3840            0x1805,   // 1805..1805; COMMON
3841            0x1806,   // 1806..18AF; MONGOLIAN
3842            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3843            0x1900,   // 1900..194F; LIMBU
3844            0x1950,   // 1950..197F; TAI_LE
3845            0x1980,   // 1980..19DF; NEW_TAI_LUE
3846            0x19E0,   // 19E0..19FF; KHMER
3847            0x1A00,   // 1A00..1A1F; BUGINESE
3848            0x1A20,   // 1A20..1AFF; TAI_THAM
3849            0x1B00,   // 1B00..1B7F; BALINESE
3850            0x1B80,   // 1B80..1BBF; SUNDANESE
3851            0x1BC0,   // 1BC0..1BFF; BATAK
3852            0x1C00,   // 1C00..1C4F; LEPCHA
3853            0x1C50,   // 1C50..1CBF; OL_CHIKI
3854            0x1CC0,   // 1CC0..1CCF; SUNDANESE
3855            0x1CD0,   // 1CD0..1CD2; INHERITED
3856            0x1CD3,   // 1CD3..1CD3; COMMON
3857            0x1CD4,   // 1CD4..1CE0; INHERITED
3858            0x1CE1,   // 1CE1..1CE1; COMMON
3859            0x1CE2,   // 1CE2..1CE8; INHERITED
3860            0x1CE9,   // 1CE9..1CEC; COMMON
3861            0x1CED,   // 1CED..1CED; INHERITED
3862            0x1CEE,   // 1CEE..1CF3; COMMON
3863            0x1CF4,   // 1CF4..1CF4; INHERITED
3864            0x1CF5,   // 1CF5..1CFF; COMMON
3865            0x1D00,   // 1D00..1D25; LATIN
3866            0x1D26,   // 1D26..1D2A; GREEK
3867            0x1D2B,   // 1D2B..1D2B; CYRILLIC
3868            0x1D2C,   // 1D2C..1D5C; LATIN
3869            0x1D5D,   // 1D5D..1D61; GREEK
3870            0x1D62,   // 1D62..1D65; LATIN
3871            0x1D66,   // 1D66..1D6A; GREEK
3872            0x1D6B,   // 1D6B..1D77; LATIN
3873            0x1D78,   // 1D78..1D78; CYRILLIC
3874            0x1D79,   // 1D79..1DBE; LATIN
3875            0x1DBF,   // 1DBF..1DBF; GREEK
3876            0x1DC0,   // 1DC0..1DFF; INHERITED
3877            0x1E00,   // 1E00..1EFF; LATIN
3878            0x1F00,   // 1F00..1FFF; GREEK
3879            0x2000,   // 2000..200B; COMMON
3880            0x200C,   // 200C..200D; INHERITED
3881            0x200E,   // 200E..2070; COMMON
3882            0x2071,   // 2071..2073; LATIN
3883            0x2074,   // 2074..207E; COMMON
3884            0x207F,   // 207F..207F; LATIN
3885            0x2080,   // 2080..208F; COMMON
3886            0x2090,   // 2090..209F; LATIN
3887            0x20A0,   // 20A0..20CF; COMMON
3888            0x20D0,   // 20D0..20FF; INHERITED
3889            0x2100,   // 2100..2125; COMMON
3890            0x2126,   // 2126..2126; GREEK
3891            0x2127,   // 2127..2129; COMMON
3892            0x212A,   // 212A..212B; LATIN
3893            0x212C,   // 212C..2131; COMMON
3894            0x2132,   // 2132..2132; LATIN
3895            0x2133,   // 2133..214D; COMMON
3896            0x214E,   // 214E..214E; LATIN
3897            0x214F,   // 214F..215F; COMMON
3898            0x2160,   // 2160..2188; LATIN
3899            0x2189,   // 2189..27FF; COMMON
3900            0x2800,   // 2800..28FF; BRAILLE
3901            0x2900,   // 2900..2BFF; COMMON
3902            0x2C00,   // 2C00..2C5F; GLAGOLITIC
3903            0x2C60,   // 2C60..2C7F; LATIN
3904            0x2C80,   // 2C80..2CFF; COPTIC
3905            0x2D00,   // 2D00..2D2F; GEORGIAN
3906            0x2D30,   // 2D30..2D7F; TIFINAGH
3907            0x2D80,   // 2D80..2DDF; ETHIOPIC
3908            0x2DE0,   // 2DE0..2DFF; CYRILLIC
3909            0x2E00,   // 2E00..2E7F; COMMON
3910            0x2E80,   // 2E80..2FEF; HAN
3911            0x2FF0,   // 2FF0..3004; COMMON
3912            0x3005,   // 3005..3005; HAN
3913            0x3006,   // 3006..3006; COMMON
3914            0x3007,   // 3007..3007; HAN
3915            0x3008,   // 3008..3020; COMMON
3916            0x3021,   // 3021..3029; HAN
3917            0x302A,   // 302A..302D; INHERITED
3918            0x302E,   // 302E..302F; HANGUL
3919            0x3030,   // 3030..3037; COMMON
3920            0x3038,   // 3038..303B; HAN
3921            0x303C,   // 303C..3040; COMMON
3922            0x3041,   // 3041..3098; HIRAGANA
3923            0x3099,   // 3099..309A; INHERITED
3924            0x309B,   // 309B..309C; COMMON
3925            0x309D,   // 309D..309F; HIRAGANA
3926            0x30A0,   // 30A0..30A0; COMMON
3927            0x30A1,   // 30A1..30FA; KATAKANA
3928            0x30FB,   // 30FB..30FC; COMMON
3929            0x30FD,   // 30FD..3104; KATAKANA
3930            0x3105,   // 3105..3130; BOPOMOFO
3931            0x3131,   // 3131..318F; HANGUL
3932            0x3190,   // 3190..319F; COMMON
3933            0x31A0,   // 31A0..31BF; BOPOMOFO
3934            0x31C0,   // 31C0..31EF; COMMON
3935            0x31F0,   // 31F0..31FF; KATAKANA
3936            0x3200,   // 3200..321F; HANGUL
3937            0x3220,   // 3220..325F; COMMON
3938            0x3260,   // 3260..327E; HANGUL
3939            0x327F,   // 327F..32CF; COMMON
3940            0x32D0,   // 32D0..3357; KATAKANA
3941            0x3358,   // 3358..33FF; COMMON
3942            0x3400,   // 3400..4DBF; HAN
3943            0x4DC0,   // 4DC0..4DFF; COMMON
3944            0x4E00,   // 4E00..9FFF; HAN
3945            0xA000,   // A000..A4CF; YI
3946            0xA4D0,   // A4D0..A4FF; LISU
3947            0xA500,   // A500..A63F; VAI
3948            0xA640,   // A640..A69F; CYRILLIC
3949            0xA6A0,   // A6A0..A6FF; BAMUM
3950            0xA700,   // A700..A721; COMMON
3951            0xA722,   // A722..A787; LATIN
3952            0xA788,   // A788..A78A; COMMON
3953            0xA78B,   // A78B..A7FF; LATIN
3954            0xA800,   // A800..A82F; SYLOTI_NAGRI
3955            0xA830,   // A830..A83F; COMMON
3956            0xA840,   // A840..A87F; PHAGS_PA
3957            0xA880,   // A880..A8DF; SAURASHTRA
3958            0xA8E0,   // A8E0..A8FF; DEVANAGARI
3959            0xA900,   // A900..A92F; KAYAH_LI
3960            0xA930,   // A930..A95F; REJANG
3961            0xA960,   // A960..A97F; HANGUL
3962            0xA980,   // A980..A9FF; JAVANESE
3963            0xAA00,   // AA00..AA5F; CHAM
3964            0xAA60,   // AA60..AA7F; MYANMAR
3965            0xAA80,   // AA80..AADF; TAI_VIET
3966            0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3967            0xAB01,   // AB01..ABBF; ETHIOPIC
3968            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3969            0xAC00,   // AC00..D7FB; HANGUL
3970            0xD7FC,   // D7FC..F8FF; UNKNOWN
3971            0xF900,   // F900..FAFF; HAN
3972            0xFB00,   // FB00..FB12; LATIN
3973            0xFB13,   // FB13..FB1C; ARMENIAN
3974            0xFB1D,   // FB1D..FB4F; HEBREW
3975            0xFB50,   // FB50..FD3D; ARABIC
3976            0xFD3E,   // FD3E..FD4F; COMMON
3977            0xFD50,   // FD50..FDFC; ARABIC
3978            0xFDFD,   // FDFD..FDFF; COMMON
3979            0xFE00,   // FE00..FE0F; INHERITED
3980            0xFE10,   // FE10..FE1F; COMMON
3981            0xFE20,   // FE20..FE2F; INHERITED
3982            0xFE30,   // FE30..FE6F; COMMON
3983            0xFE70,   // FE70..FEFE; ARABIC
3984            0xFEFF,   // FEFF..FF20; COMMON
3985            0xFF21,   // FF21..FF3A; LATIN
3986            0xFF3B,   // FF3B..FF40; COMMON
3987            0xFF41,   // FF41..FF5A; LATIN
3988            0xFF5B,   // FF5B..FF65; COMMON
3989            0xFF66,   // FF66..FF6F; KATAKANA
3990            0xFF70,   // FF70..FF70; COMMON
3991            0xFF71,   // FF71..FF9D; KATAKANA
3992            0xFF9E,   // FF9E..FF9F; COMMON
3993            0xFFA0,   // FFA0..FFDF; HANGUL
3994            0xFFE0,   // FFE0..FFFF; COMMON
3995            0x10000,  // 10000..100FF; LINEAR_B
3996            0x10100,  // 10100..1013F; COMMON
3997            0x10140,  // 10140..1018F; GREEK
3998            0x10190,  // 10190..101FC; COMMON
3999            0x101FD,  // 101FD..1027F; INHERITED
4000            0x10280,  // 10280..1029F; LYCIAN
4001            0x102A0,  // 102A0..102FF; CARIAN
4002            0x10300,  // 10300..1032F; OLD_ITALIC
4003            0x10330,  // 10330..1037F; GOTHIC
4004            0x10380,  // 10380..1039F; UGARITIC
4005            0x103A0,  // 103A0..103FF; OLD_PERSIAN
4006            0x10400,  // 10400..1044F; DESERET
4007            0x10450,  // 10450..1047F; SHAVIAN
4008            0x10480,  // 10480..107FF; OSMANYA
4009            0x10800,  // 10800..1083F; CYPRIOT
4010            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
4011            0x10900,  // 10900..1091F; PHOENICIAN
4012            0x10920,  // 10920..1097F; LYDIAN
4013            0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
4014            0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
4015            0x10A00,  // 10A00..10A5F; KHAROSHTHI
4016            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4017            0x10B00,  // 10B00..10B3F; AVESTAN
4018            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4019            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4020            0x10C00,  // 10C00..10E5F; OLD_TURKIC
4021            0x10E60,  // 10E60..10FFF; ARABIC
4022            0x11000,  // 11000..1107F; BRAHMI
4023            0x11080,  // 11080..110CF; KAITHI
4024            0x110D0,  // 110D0..110FF; SORA_SOMPENG
4025            0x11100,  // 11100..1117F; CHAKMA
4026            0x11180,  // 11180..1167F; SHARADA
4027            0x11680,  // 11680..116CF; TAKRI
4028            0x12000,  // 12000..12FFF; CUNEIFORM
4029            0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4030            0x16800,  // 16800..16A38; BAMUM
4031            0x16F00,  // 16F00..16F9F; MIAO
4032            0x1B000,  // 1B000..1B000; KATAKANA
4033            0x1B001,  // 1B001..1CFFF; HIRAGANA
4034            0x1D000,  // 1D000..1D166; COMMON
4035            0x1D167,  // 1D167..1D169; INHERITED
4036            0x1D16A,  // 1D16A..1D17A; COMMON
4037            0x1D17B,  // 1D17B..1D182; INHERITED
4038            0x1D183,  // 1D183..1D184; COMMON
4039            0x1D185,  // 1D185..1D18B; INHERITED
4040            0x1D18C,  // 1D18C..1D1A9; COMMON
4041            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4042            0x1D1AE,  // 1D1AE..1D1FF; COMMON
4043            0x1D200,  // 1D200..1D2FF; GREEK
4044            0x1D300,  // 1D300..1EDFF; COMMON
4045            0x1EE00,  // 1EE00..1EFFF; ARABIC
4046            0x1F000,  // 1F000..1F1FF; COMMON
4047            0x1F200,  // 1F200..1F200; HIRAGANA
4048            0x1F201,  // 1F210..1FFFF; COMMON
4049            0x20000,  // 20000..E0000; HAN
4050            0xE0001,  // E0001..E00FF; COMMON
4051            0xE0100,  // E0100..E01EF; INHERITED
4052            0xE01F0   // E01F0..10FFFF; UNKNOWN
4053
4054        };
4055
4056        private static final UnicodeScript[] scripts = {
4057            COMMON,
4058            LATIN,
4059            COMMON,
4060            LATIN,
4061            COMMON,
4062            LATIN,
4063            COMMON,
4064            LATIN,
4065            COMMON,
4066            LATIN,
4067            COMMON,
4068            LATIN,
4069            COMMON,
4070            LATIN,
4071            COMMON,
4072            LATIN,
4073            COMMON,
4074            BOPOMOFO,
4075            COMMON,
4076            INHERITED,
4077            GREEK,
4078            COMMON,
4079            GREEK,
4080            COMMON,
4081            GREEK,
4082            COMMON,
4083            GREEK,
4084            COMMON,
4085            GREEK,
4086            COPTIC,
4087            GREEK,
4088            CYRILLIC,
4089            INHERITED,
4090            CYRILLIC,
4091            ARMENIAN,
4092            COMMON,
4093            ARMENIAN,
4094            HEBREW,
4095            ARABIC,
4096            COMMON,
4097            ARABIC,
4098            COMMON,
4099            ARABIC,
4100            COMMON,
4101            ARABIC,
4102            COMMON,
4103            ARABIC,
4104            INHERITED,
4105            ARABIC,
4106            COMMON,
4107            ARABIC,
4108            INHERITED,
4109            ARABIC,
4110            COMMON,
4111            ARABIC,
4112            SYRIAC,
4113            ARABIC,
4114            THAANA,
4115            NKO,
4116            SAMARITAN,
4117            MANDAIC,
4118            ARABIC,
4119            DEVANAGARI,
4120            INHERITED,
4121            DEVANAGARI,
4122            COMMON,
4123            DEVANAGARI,
4124            BENGALI,
4125            GURMUKHI,
4126            GUJARATI,
4127            ORIYA,
4128            TAMIL,
4129            TELUGU,
4130            KANNADA,
4131            MALAYALAM,
4132            SINHALA,
4133            THAI,
4134            COMMON,
4135            THAI,
4136            LAO,
4137            TIBETAN,
4138            COMMON,
4139            TIBETAN,
4140            MYANMAR,
4141            GEORGIAN,
4142            COMMON,
4143            GEORGIAN,
4144            HANGUL,
4145            ETHIOPIC,
4146            CHEROKEE,
4147            CANADIAN_ABORIGINAL,
4148            OGHAM,
4149            RUNIC,
4150            COMMON,
4151            RUNIC,
4152            TAGALOG,
4153            HANUNOO,
4154            COMMON,
4155            BUHID,
4156            TAGBANWA,
4157            KHMER,
4158            MONGOLIAN,
4159            COMMON,
4160            MONGOLIAN,
4161            COMMON,
4162            MONGOLIAN,
4163            CANADIAN_ABORIGINAL,
4164            LIMBU,
4165            TAI_LE,
4166            NEW_TAI_LUE,
4167            KHMER,
4168            BUGINESE,
4169            TAI_THAM,
4170            BALINESE,
4171            SUNDANESE,
4172            BATAK,
4173            LEPCHA,
4174            OL_CHIKI,
4175            SUNDANESE,
4176            INHERITED,
4177            COMMON,
4178            INHERITED,
4179            COMMON,
4180            INHERITED,
4181            COMMON,
4182            INHERITED,
4183            COMMON,
4184            INHERITED,
4185            COMMON,
4186            LATIN,
4187            GREEK,
4188            CYRILLIC,
4189            LATIN,
4190            GREEK,
4191            LATIN,
4192            GREEK,
4193            LATIN,
4194            CYRILLIC,
4195            LATIN,
4196            GREEK,
4197            INHERITED,
4198            LATIN,
4199            GREEK,
4200            COMMON,
4201            INHERITED,
4202            COMMON,
4203            LATIN,
4204            COMMON,
4205            LATIN,
4206            COMMON,
4207            LATIN,
4208            COMMON,
4209            INHERITED,
4210            COMMON,
4211            GREEK,
4212            COMMON,
4213            LATIN,
4214            COMMON,
4215            LATIN,
4216            COMMON,
4217            LATIN,
4218            COMMON,
4219            LATIN,
4220            COMMON,
4221            BRAILLE,
4222            COMMON,
4223            GLAGOLITIC,
4224            LATIN,
4225            COPTIC,
4226            GEORGIAN,
4227            TIFINAGH,
4228            ETHIOPIC,
4229            CYRILLIC,
4230            COMMON,
4231            HAN,
4232            COMMON,
4233            HAN,
4234            COMMON,
4235            HAN,
4236            COMMON,
4237            HAN,
4238            INHERITED,
4239            HANGUL,
4240            COMMON,
4241            HAN,
4242            COMMON,
4243            HIRAGANA,
4244            INHERITED,
4245            COMMON,
4246            HIRAGANA,
4247            COMMON,
4248            KATAKANA,
4249            COMMON,
4250            KATAKANA,
4251            BOPOMOFO,
4252            HANGUL,
4253            COMMON,
4254            BOPOMOFO,
4255            COMMON,
4256            KATAKANA,
4257            HANGUL,
4258            COMMON,
4259            HANGUL,
4260            COMMON,
4261            KATAKANA,
4262            COMMON,
4263            HAN,
4264            COMMON,
4265            HAN,
4266            YI,
4267            LISU,
4268            VAI,
4269            CYRILLIC,
4270            BAMUM,
4271            COMMON,
4272            LATIN,
4273            COMMON,
4274            LATIN,
4275            SYLOTI_NAGRI,
4276            COMMON,
4277            PHAGS_PA,
4278            SAURASHTRA,
4279            DEVANAGARI,
4280            KAYAH_LI,
4281            REJANG,
4282            HANGUL,
4283            JAVANESE,
4284            CHAM,
4285            MYANMAR,
4286            TAI_VIET,
4287            MEETEI_MAYEK,
4288            ETHIOPIC,
4289            MEETEI_MAYEK,
4290            HANGUL,
4291            UNKNOWN     ,
4292            HAN,
4293            LATIN,
4294            ARMENIAN,
4295            HEBREW,
4296            ARABIC,
4297            COMMON,
4298            ARABIC,
4299            COMMON,
4300            INHERITED,
4301            COMMON,
4302            INHERITED,
4303            COMMON,
4304            ARABIC,
4305            COMMON,
4306            LATIN,
4307            COMMON,
4308            LATIN,
4309            COMMON,
4310            KATAKANA,
4311            COMMON,
4312            KATAKANA,
4313            COMMON,
4314            HANGUL,
4315            COMMON,
4316            LINEAR_B,
4317            COMMON,
4318            GREEK,
4319            COMMON,
4320            INHERITED,
4321            LYCIAN,
4322            CARIAN,
4323            OLD_ITALIC,
4324            GOTHIC,
4325            UGARITIC,
4326            OLD_PERSIAN,
4327            DESERET,
4328            SHAVIAN,
4329            OSMANYA,
4330            CYPRIOT,
4331            IMPERIAL_ARAMAIC,
4332            PHOENICIAN,
4333            LYDIAN,
4334            MEROITIC_HIEROGLYPHS,
4335            MEROITIC_CURSIVE,
4336            KHAROSHTHI,
4337            OLD_SOUTH_ARABIAN,
4338            AVESTAN,
4339            INSCRIPTIONAL_PARTHIAN,
4340            INSCRIPTIONAL_PAHLAVI,
4341            OLD_TURKIC,
4342            ARABIC,
4343            BRAHMI,
4344            KAITHI,
4345            SORA_SOMPENG,
4346            CHAKMA,
4347            SHARADA,
4348            TAKRI,
4349            CUNEIFORM,
4350            EGYPTIAN_HIEROGLYPHS,
4351            BAMUM,
4352            MIAO,
4353            KATAKANA,
4354            HIRAGANA,
4355            COMMON,
4356            INHERITED,
4357            COMMON,
4358            INHERITED,
4359            COMMON,
4360            INHERITED,
4361            COMMON,
4362            INHERITED,
4363            COMMON,
4364            GREEK,
4365            COMMON,
4366            ARABIC,
4367            COMMON,
4368            HIRAGANA,
4369            COMMON,
4370            HAN,
4371            COMMON,
4372            INHERITED,
4373            UNKNOWN
4374        };
4375
4376        private static HashMap<String, Character.UnicodeScript> aliases;
4377        static {
4378            aliases = new HashMap<>(128);
4379            aliases.put("ARAB", ARABIC);
4380            aliases.put("ARMI", IMPERIAL_ARAMAIC);
4381            aliases.put("ARMN", ARMENIAN);
4382            aliases.put("AVST", AVESTAN);
4383            aliases.put("BALI", BALINESE);
4384            aliases.put("BAMU", BAMUM);
4385            aliases.put("BATK", BATAK);
4386            aliases.put("BENG", BENGALI);
4387            aliases.put("BOPO", BOPOMOFO);
4388            aliases.put("BRAI", BRAILLE);
4389            aliases.put("BRAH", BRAHMI);
4390            aliases.put("BUGI", BUGINESE);
4391            aliases.put("BUHD", BUHID);
4392            aliases.put("CAKM", CHAKMA);
4393            aliases.put("CANS", CANADIAN_ABORIGINAL);
4394            aliases.put("CARI", CARIAN);
4395            aliases.put("CHAM", CHAM);
4396            aliases.put("CHER", CHEROKEE);
4397            aliases.put("COPT", COPTIC);
4398            aliases.put("CPRT", CYPRIOT);
4399            aliases.put("CYRL", CYRILLIC);
4400            aliases.put("DEVA", DEVANAGARI);
4401            aliases.put("DSRT", DESERET);
4402            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4403            aliases.put("ETHI", ETHIOPIC);
4404            aliases.put("GEOR", GEORGIAN);
4405            aliases.put("GLAG", GLAGOLITIC);
4406            aliases.put("GOTH", GOTHIC);
4407            aliases.put("GREK", GREEK);
4408            aliases.put("GUJR", GUJARATI);
4409            aliases.put("GURU", GURMUKHI);
4410            aliases.put("HANG", HANGUL);
4411            aliases.put("HANI", HAN);
4412            aliases.put("HANO", HANUNOO);
4413            aliases.put("HEBR", HEBREW);
4414            aliases.put("HIRA", HIRAGANA);
4415            // it appears we don't have the KATAKANA_OR_HIRAGANA
4416            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4417            aliases.put("ITAL", OLD_ITALIC);
4418            aliases.put("JAVA", JAVANESE);
4419            aliases.put("KALI", KAYAH_LI);
4420            aliases.put("KANA", KATAKANA);
4421            aliases.put("KHAR", KHAROSHTHI);
4422            aliases.put("KHMR", KHMER);
4423            aliases.put("KNDA", KANNADA);
4424            aliases.put("KTHI", KAITHI);
4425            aliases.put("LANA", TAI_THAM);
4426            aliases.put("LAOO", LAO);
4427            aliases.put("LATN", LATIN);
4428            aliases.put("LEPC", LEPCHA);
4429            aliases.put("LIMB", LIMBU);
4430            aliases.put("LINB", LINEAR_B);
4431            aliases.put("LISU", LISU);
4432            aliases.put("LYCI", LYCIAN);
4433            aliases.put("LYDI", LYDIAN);
4434            aliases.put("MAND", MANDAIC);
4435            aliases.put("MERC", MEROITIC_CURSIVE);
4436            aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4437            aliases.put("MLYM", MALAYALAM);
4438            aliases.put("MONG", MONGOLIAN);
4439            aliases.put("MTEI", MEETEI_MAYEK);
4440            aliases.put("MYMR", MYANMAR);
4441            aliases.put("NKOO", NKO);
4442            aliases.put("OGAM", OGHAM);
4443            aliases.put("OLCK", OL_CHIKI);
4444            aliases.put("ORKH", OLD_TURKIC);
4445            aliases.put("ORYA", ORIYA);
4446            aliases.put("OSMA", OSMANYA);
4447            aliases.put("PHAG", PHAGS_PA);
4448            aliases.put("PLRD", MIAO);
4449            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4450            aliases.put("PHNX", PHOENICIAN);
4451            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4452            aliases.put("RJNG", REJANG);
4453            aliases.put("RUNR", RUNIC);
4454            aliases.put("SAMR", SAMARITAN);
4455            aliases.put("SARB", OLD_SOUTH_ARABIAN);
4456            aliases.put("SAUR", SAURASHTRA);
4457            aliases.put("SHAW", SHAVIAN);
4458            aliases.put("SHRD", SHARADA);
4459            aliases.put("SINH", SINHALA);
4460            aliases.put("SORA", SORA_SOMPENG);
4461            aliases.put("SUND", SUNDANESE);
4462            aliases.put("SYLO", SYLOTI_NAGRI);
4463            aliases.put("SYRC", SYRIAC);
4464            aliases.put("TAGB", TAGBANWA);
4465            aliases.put("TALE", TAI_LE);
4466            aliases.put("TAKR", TAKRI);
4467            aliases.put("TALU", NEW_TAI_LUE);
4468            aliases.put("TAML", TAMIL);
4469            aliases.put("TAVT", TAI_VIET);
4470            aliases.put("TELU", TELUGU);
4471            aliases.put("TFNG", TIFINAGH);
4472            aliases.put("TGLG", TAGALOG);
4473            aliases.put("THAA", THAANA);
4474            aliases.put("THAI", THAI);
4475            aliases.put("TIBT", TIBETAN);
4476            aliases.put("UGAR", UGARITIC);
4477            aliases.put("VAII", VAI);
4478            aliases.put("XPEO", OLD_PERSIAN);
4479            aliases.put("XSUX", CUNEIFORM);
4480            aliases.put("YIII", YI);
4481            aliases.put("ZINH", INHERITED);
4482            aliases.put("ZYYY", COMMON);
4483            aliases.put("ZZZZ", UNKNOWN);
4484        }
4485
4486        /**
4487         * Returns the enum constant representing the Unicode script of which
4488         * the given character (Unicode code point) is assigned to.
4489         *
4490         * @param   codePoint the character (Unicode code point) in question.
4491         * @return  The {@code UnicodeScript} constant representing the
4492         *          Unicode script of which this character is assigned to.
4493         *
4494         * @exception IllegalArgumentException if the specified
4495         * {@code codePoint} is an invalid Unicode code point.
4496         * @see Character#isValidCodePoint(int)
4497         *
4498         */
4499        public static UnicodeScript of(int codePoint) {
4500            if (!isValidCodePoint(codePoint))
4501                throw new IllegalArgumentException();
4502            int type = getType(codePoint);
4503            // leave SURROGATE and PRIVATE_USE for table lookup
4504            if (type == UNASSIGNED)
4505                return UNKNOWN;
4506            int index = Arrays.binarySearch(scriptStarts, codePoint);
4507            if (index < 0)
4508                index = -index - 2;
4509            return scripts[index];
4510        }
4511
4512        /**
4513         * Returns the UnicodeScript constant with the given Unicode script
4514         * name or the script name alias. Script names and their aliases are
4515         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4516         * and PropertyValueAliases&lt;version&gt;.txt define script names
4517         * and the script name aliases for a particular version of the
4518         * standard. The {@link Character} class specifies the version of
4519         * the standard that it supports.
4520         * <p>
4521         * Character case is ignored for all of the valid script names.
4522         * The en_US locale's case mapping rules are used to provide
4523         * case-insensitive string comparisons for script name validation.
4524         * <p>
4525         *
4526         * @param scriptName A {@code UnicodeScript} name.
4527         * @return The {@code UnicodeScript} constant identified
4528         *         by {@code scriptName}
4529         * @throws IllegalArgumentException if {@code scriptName} is an
4530         *         invalid name
4531         * @throws NullPointerException if {@code scriptName} is null
4532         */
4533        public static final UnicodeScript forName(String scriptName) {
4534            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4535                                 //.replace(' ', '_'));
4536            UnicodeScript sc = aliases.get(scriptName);
4537            if (sc != null)
4538                return sc;
4539            return valueOf(scriptName);
4540        }
4541    }
4542
4543    /**
4544     * The value of the {@code Character}.
4545     *
4546     * @serial
4547     */
4548    private final char value;
4549
4550    /** use serialVersionUID from JDK 1.0.2 for interoperability */
4551    private static final long serialVersionUID = 3786198910865385080L;
4552
4553    /**
4554     * Constructs a newly allocated {@code Character} object that
4555     * represents the specified {@code char} value.
4556     *
4557     * @param  value   the value to be represented by the
4558     *                  {@code Character} object.
4559     */
4560    public Character(char value) {
4561        this.value = value;
4562    }
4563
4564    private static class CharacterCache {
4565        private CharacterCache(){}
4566
4567        static final Character cache[] = new Character[127 + 1];
4568
4569        static {
4570            for (int i = 0; i < cache.length; i++)
4571                cache[i] = new Character((char)i);
4572        }
4573    }
4574
4575    /**
4576     * Returns a <tt>Character</tt> instance representing the specified
4577     * <tt>char</tt> value.
4578     * If a new <tt>Character</tt> instance is not required, this method
4579     * should generally be used in preference to the constructor
4580     * {@link #Character(char)}, as this method is likely to yield
4581     * significantly better space and time performance by caching
4582     * frequently requested values.
4583     *
4584     * This method will always cache values in the range {@code
4585     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4586     * cache other values outside of this range.
4587     *
4588     * @param  c a char value.
4589     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4590     * @since  1.5
4591     */
4592    public static Character valueOf(char c) {
4593        if (c <= 127) { // must cache
4594            return CharacterCache.cache[(int)c];
4595        }
4596        return new Character(c);
4597    }
4598
4599    /**
4600     * Returns the value of this {@code Character} object.
4601     * @return  the primitive {@code char} value represented by
4602     *          this object.
4603     */
4604    public char charValue() {
4605        return value;
4606    }
4607
4608    /**
4609     * Returns a hash code for this {@code Character}; equal to the result
4610     * of invoking {@code charValue()}.
4611     *
4612     * @return a hash code value for this {@code Character}
4613     */
4614    @Override
4615    public int hashCode() {
4616        return Character.hashCode(value);
4617    }
4618
4619    /**
4620     * Returns a hash code for a {@code char} value; compatible with
4621     * {@code Character.hashCode()}.
4622     *
4623     * @since 1.8
4624     *
4625     * @param value The {@code char} for which to return a hash code.
4626     * @return a hash code value for a {@code char} value.
4627     */
4628    public static int hashCode(char value) {
4629        return (int)value;
4630    }
4631
4632    /**
4633     * Compares this object against the specified object.
4634     * The result is {@code true} if and only if the argument is not
4635     * {@code null} and is a {@code Character} object that
4636     * represents the same {@code char} value as this object.
4637     *
4638     * @param   obj   the object to compare with.
4639     * @return  {@code true} if the objects are the same;
4640     *          {@code false} otherwise.
4641     */
4642    public boolean equals(Object obj) {
4643        if (obj instanceof Character) {
4644            return value == ((Character)obj).charValue();
4645        }
4646        return false;
4647    }
4648
4649    /**
4650     * Returns a {@code String} object representing this
4651     * {@code Character}'s value.  The result is a string of
4652     * length 1 whose sole component is the primitive
4653     * {@code char} value represented by this
4654     * {@code Character} object.
4655     *
4656     * @return  a string representation of this object.
4657     */
4658    public String toString() {
4659        char buf[] = {value};
4660        return String.valueOf(buf);
4661    }
4662
4663    /**
4664     * Returns a {@code String} object representing the
4665     * specified {@code char}.  The result is a string of length
4666     * 1 consisting solely of the specified {@code char}.
4667     *
4668     * @param c the {@code char} to be converted
4669     * @return the string representation of the specified {@code char}
4670     * @since 1.4
4671     */
4672    public static String toString(char c) {
4673        return String.valueOf(c);
4674    }
4675
4676    /**
4677     * Determines whether the specified code point is a valid
4678     * <a href="http://www.unicode.org/glossary/#code_point">
4679     * Unicode code point value</a>.
4680     *
4681     * @param  codePoint the Unicode code point to be tested
4682     * @return {@code true} if the specified code point value is between
4683     *         {@link #MIN_CODE_POINT} and
4684     *         {@link #MAX_CODE_POINT} inclusive;
4685     *         {@code false} otherwise.
4686     * @since  1.5
4687     */
4688    public static boolean isValidCodePoint(int codePoint) {
4689        // Optimized form of:
4690        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4691        int plane = codePoint >>> 16;
4692        return plane < ((MAX_CODE_POINT + 1) >>> 16);
4693    }
4694
4695    /**
4696     * Determines whether the specified character (Unicode code point)
4697     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4698     * Such code points can be represented using a single {@code char}.
4699     *
4700     * @param  codePoint the character (Unicode code point) to be tested
4701     * @return {@code true} if the specified code point is between
4702     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4703     *         {@code false} otherwise.
4704     * @since  1.7
4705     */
4706    public static boolean isBmpCodePoint(int codePoint) {
4707        return codePoint >>> 16 == 0;
4708        // Optimized form of:
4709        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4710        // We consistently use logical shift (>>>) to facilitate
4711        // additional runtime optimizations.
4712    }
4713
4714    /**
4715     * Determines whether the specified character (Unicode code point)
4716     * is in the <a href="#supplementary">supplementary character</a> range.
4717     *
4718     * @param  codePoint the character (Unicode code point) to be tested
4719     * @return {@code true} if the specified code point is between
4720     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4721     *         {@link #MAX_CODE_POINT} inclusive;
4722     *         {@code false} otherwise.
4723     * @since  1.5
4724     */
4725    public static boolean isSupplementaryCodePoint(int codePoint) {
4726        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4727            && codePoint <  MAX_CODE_POINT + 1;
4728    }
4729
4730    /**
4731     * Determines if the given {@code char} value is a
4732     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4733     * Unicode high-surrogate code unit</a>
4734     * (also known as <i>leading-surrogate code unit</i>).
4735     *
4736     * <p>Such values do not represent characters by themselves,
4737     * but are used in the representation of
4738     * <a href="#supplementary">supplementary characters</a>
4739     * in the UTF-16 encoding.
4740     *
4741     * @param  ch the {@code char} value to be tested.
4742     * @return {@code true} if the {@code char} value is between
4743     *         {@link #MIN_HIGH_SURROGATE} and
4744     *         {@link #MAX_HIGH_SURROGATE} inclusive;
4745     *         {@code false} otherwise.
4746     * @see    Character#isLowSurrogate(char)
4747     * @see    Character.UnicodeBlock#of(int)
4748     * @since  1.5
4749     */
4750    public static boolean isHighSurrogate(char ch) {
4751        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4752        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4753    }
4754
4755    /**
4756     * Determines if the given {@code char} value is a
4757     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4758     * Unicode low-surrogate code unit</a>
4759     * (also known as <i>trailing-surrogate code unit</i>).
4760     *
4761     * <p>Such values do not represent characters by themselves,
4762     * but are used in the representation of
4763     * <a href="#supplementary">supplementary characters</a>
4764     * in the UTF-16 encoding.
4765     *
4766     * @param  ch the {@code char} value to be tested.
4767     * @return {@code true} if the {@code char} value is between
4768     *         {@link #MIN_LOW_SURROGATE} and
4769     *         {@link #MAX_LOW_SURROGATE} inclusive;
4770     *         {@code false} otherwise.
4771     * @see    Character#isHighSurrogate(char)
4772     * @since  1.5
4773     */
4774    public static boolean isLowSurrogate(char ch) {
4775        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4776    }
4777
4778    /**
4779     * Determines if the given {@code char} value is a Unicode
4780     * <i>surrogate code unit</i>.
4781     *
4782     * <p>Such values do not represent characters by themselves,
4783     * but are used in the representation of
4784     * <a href="#supplementary">supplementary characters</a>
4785     * in the UTF-16 encoding.
4786     *
4787     * <p>A char value is a surrogate code unit if and only if it is either
4788     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4789     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4790     *
4791     * @param  ch the {@code char} value to be tested.
4792     * @return {@code true} if the {@code char} value is between
4793     *         {@link #MIN_SURROGATE} and
4794     *         {@link #MAX_SURROGATE} inclusive;
4795     *         {@code false} otherwise.
4796     * @since  1.7
4797     */
4798    public static boolean isSurrogate(char ch) {
4799        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4800    }
4801
4802    /**
4803     * Determines whether the specified pair of {@code char}
4804     * values is a valid
4805     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4806     * Unicode surrogate pair</a>.
4807
4808     * <p>This method is equivalent to the expression:
4809     * <blockquote><pre>{@code
4810     * isHighSurrogate(high) && isLowSurrogate(low)
4811     * }</pre></blockquote>
4812     *
4813     * @param  high the high-surrogate code value to be tested
4814     * @param  low the low-surrogate code value to be tested
4815     * @return {@code true} if the specified high and
4816     * low-surrogate code values represent a valid surrogate pair;
4817     * {@code false} otherwise.
4818     * @since  1.5
4819     */
4820    public static boolean isSurrogatePair(char high, char low) {
4821        return isHighSurrogate(high) && isLowSurrogate(low);
4822    }
4823
4824    /**
4825     * Determines the number of {@code char} values needed to
4826     * represent the specified character (Unicode code point). If the
4827     * specified character is equal to or greater than 0x10000, then
4828     * the method returns 2. Otherwise, the method returns 1.
4829     *
4830     * <p>This method doesn't validate the specified character to be a
4831     * valid Unicode code point. The caller must validate the
4832     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4833     * if necessary.
4834     *
4835     * @param   codePoint the character (Unicode code point) to be tested.
4836     * @return  2 if the character is a valid supplementary character; 1 otherwise.
4837     * @see     Character#isSupplementaryCodePoint(int)
4838     * @since   1.5
4839     */
4840    public static int charCount(int codePoint) {
4841        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4842    }
4843
4844    /**
4845     * Converts the specified surrogate pair to its supplementary code
4846     * point value. This method does not validate the specified
4847     * surrogate pair. The caller must validate it using {@link
4848     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4849     *
4850     * @param  high the high-surrogate code unit
4851     * @param  low the low-surrogate code unit
4852     * @return the supplementary code point composed from the
4853     *         specified surrogate pair.
4854     * @since  1.5
4855     */
4856    public static int toCodePoint(char high, char low) {
4857        // Optimized form of:
4858        // return ((high - MIN_HIGH_SURROGATE) << 10)
4859        //         + (low - MIN_LOW_SURROGATE)
4860        //         + MIN_SUPPLEMENTARY_CODE_POINT;
4861        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4862                                       - (MIN_HIGH_SURROGATE << 10)
4863                                       - MIN_LOW_SURROGATE);
4864    }
4865
4866    /**
4867     * Returns the code point at the given index of the
4868     * {@code CharSequence}. If the {@code char} value at
4869     * the given index in the {@code CharSequence} is in the
4870     * high-surrogate range, the following index is less than the
4871     * length of the {@code CharSequence}, and the
4872     * {@code char} value at the following index is in the
4873     * low-surrogate range, then the supplementary code point
4874     * corresponding to this surrogate pair is returned. Otherwise,
4875     * the {@code char} value at the given index is returned.
4876     *
4877     * @param seq a sequence of {@code char} values (Unicode code
4878     * units)
4879     * @param index the index to the {@code char} values (Unicode
4880     * code units) in {@code seq} to be converted
4881     * @return the Unicode code point at the given index
4882     * @exception NullPointerException if {@code seq} is null.
4883     * @exception IndexOutOfBoundsException if the value
4884     * {@code index} is negative or not less than
4885     * {@link CharSequence#length() seq.length()}.
4886     * @since  1.5
4887     */
4888    public static int codePointAt(CharSequence seq, int index) {
4889        char c1 = seq.charAt(index);
4890        if (isHighSurrogate(c1) && ++index < seq.length()) {
4891            char c2 = seq.charAt(index);
4892            if (isLowSurrogate(c2)) {
4893                return toCodePoint(c1, c2);
4894            }
4895        }
4896        return c1;
4897    }
4898
4899    /**
4900     * Returns the code point at the given index of the
4901     * {@code char} array. If the {@code char} value at
4902     * the given index in the {@code char} array is in the
4903     * high-surrogate range, the following index is less than the
4904     * length of the {@code char} array, and the
4905     * {@code char} value at the following index is in the
4906     * low-surrogate range, then the supplementary code point
4907     * corresponding to this surrogate pair is returned. Otherwise,
4908     * the {@code char} value at the given index is returned.
4909     *
4910     * @param a the {@code char} array
4911     * @param index the index to the {@code char} values (Unicode
4912     * code units) in the {@code char} array to be converted
4913     * @return the Unicode code point at the given index
4914     * @exception NullPointerException if {@code a} is null.
4915     * @exception IndexOutOfBoundsException if the value
4916     * {@code index} is negative or not less than
4917     * the length of the {@code char} array.
4918     * @since  1.5
4919     */
4920    public static int codePointAt(char[] a, int index) {
4921        return codePointAtImpl(a, index, a.length);
4922    }
4923
4924    /**
4925     * Returns the code point at the given index of the
4926     * {@code char} array, where only array elements with
4927     * {@code index} less than {@code limit} can be used. If
4928     * the {@code char} value at the given index in the
4929     * {@code char} array is in the high-surrogate range, the
4930     * following index is less than the {@code limit}, and the
4931     * {@code char} value at the following index is in the
4932     * low-surrogate range, then the supplementary code point
4933     * corresponding to this surrogate pair is returned. Otherwise,
4934     * the {@code char} value at the given index is returned.
4935     *
4936     * @param a the {@code char} array
4937     * @param index the index to the {@code char} values (Unicode
4938     * code units) in the {@code char} array to be converted
4939     * @param limit the index after the last array element that
4940     * can be used in the {@code char} array
4941     * @return the Unicode code point at the given index
4942     * @exception NullPointerException if {@code a} is null.
4943     * @exception IndexOutOfBoundsException if the {@code index}
4944     * argument is negative or not less than the {@code limit}
4945     * argument, or if the {@code limit} argument is negative or
4946     * greater than the length of the {@code char} array.
4947     * @since  1.5
4948     */
4949    public static int codePointAt(char[] a, int index, int limit) {
4950        if (index >= limit || limit < 0 || limit > a.length) {
4951            throw new IndexOutOfBoundsException();
4952        }
4953        return codePointAtImpl(a, index, limit);
4954    }
4955
4956    // throws ArrayIndexOutOfBoundsException if index out of bounds
4957    static int codePointAtImpl(char[] a, int index, int limit) {
4958        char c1 = a[index];
4959        if (isHighSurrogate(c1) && ++index < limit) {
4960            char c2 = a[index];
4961            if (isLowSurrogate(c2)) {
4962                return toCodePoint(c1, c2);
4963            }
4964        }
4965        return c1;
4966    }
4967
4968    /**
4969     * Returns the code point preceding the given index of the
4970     * {@code CharSequence}. If the {@code char} value at
4971     * {@code (index - 1)} in the {@code CharSequence} is in
4972     * the low-surrogate range, {@code (index - 2)} is not
4973     * negative, and the {@code char} value at {@code (index - 2)}
4974     * in the {@code CharSequence} is in the
4975     * high-surrogate range, then the supplementary code point
4976     * corresponding to this surrogate pair is returned. Otherwise,
4977     * the {@code char} value at {@code (index - 1)} is
4978     * returned.
4979     *
4980     * @param seq the {@code CharSequence} instance
4981     * @param index the index following the code point that should be returned
4982     * @return the Unicode code point value before the given index.
4983     * @exception NullPointerException if {@code seq} is null.
4984     * @exception IndexOutOfBoundsException if the {@code index}
4985     * argument is less than 1 or greater than {@link
4986     * CharSequence#length() seq.length()}.
4987     * @since  1.5
4988     */
4989    public static int codePointBefore(CharSequence seq, int index) {
4990        char c2 = seq.charAt(--index);
4991        if (isLowSurrogate(c2) && index > 0) {
4992            char c1 = seq.charAt(--index);
4993            if (isHighSurrogate(c1)) {
4994                return toCodePoint(c1, c2);
4995            }
4996        }
4997        return c2;
4998    }
4999
5000    /**
5001     * Returns the code point preceding the given index of the
5002     * {@code char} array. If the {@code char} value at
5003     * {@code (index - 1)} in the {@code char} array is in
5004     * the low-surrogate range, {@code (index - 2)} is not
5005     * negative, and the {@code char} value at {@code (index - 2)}
5006     * in the {@code char} array is in the
5007     * high-surrogate range, then the supplementary code point
5008     * corresponding to this surrogate pair is returned. Otherwise,
5009     * the {@code char} value at {@code (index - 1)} is
5010     * returned.
5011     *
5012     * @param a the {@code char} array
5013     * @param index the index following the code point that should be returned
5014     * @return the Unicode code point value before the given index.
5015     * @exception NullPointerException if {@code a} is null.
5016     * @exception IndexOutOfBoundsException if the {@code index}
5017     * argument is less than 1 or greater than the length of the
5018     * {@code char} array
5019     * @since  1.5
5020     */
5021    public static int codePointBefore(char[] a, int index) {
5022        return codePointBeforeImpl(a, index, 0);
5023    }
5024
5025    /**
5026     * Returns the code point preceding the given index of the
5027     * {@code char} array, where only array elements with
5028     * {@code index} greater than or equal to {@code start}
5029     * can be used. If the {@code char} value at {@code (index - 1)}
5030     * in the {@code char} array is in the
5031     * low-surrogate range, {@code (index - 2)} is not less than
5032     * {@code start}, and the {@code char} value at
5033     * {@code (index - 2)} in the {@code char} array is in
5034     * the high-surrogate range, then the supplementary code point
5035     * corresponding to this surrogate pair is returned. Otherwise,
5036     * the {@code char} value at {@code (index - 1)} is
5037     * returned.
5038     *
5039     * @param a the {@code char} array
5040     * @param index the index following the code point that should be returned
5041     * @param start the index of the first array element in the
5042     * {@code char} array
5043     * @return the Unicode code point value before the given index.
5044     * @exception NullPointerException if {@code a} is null.
5045     * @exception IndexOutOfBoundsException if the {@code index}
5046     * argument is not greater than the {@code start} argument or
5047     * is greater than the length of the {@code char} array, or
5048     * if the {@code start} argument is negative or not less than
5049     * the length of the {@code char} array.
5050     * @since  1.5
5051     */
5052    public static int codePointBefore(char[] a, int index, int start) {
5053        if (index <= start || start < 0 || start >= a.length) {
5054            throw new IndexOutOfBoundsException();
5055        }
5056        return codePointBeforeImpl(a, index, start);
5057    }
5058
5059    // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5060    static int codePointBeforeImpl(char[] a, int index, int start) {
5061        char c2 = a[--index];
5062        if (isLowSurrogate(c2) && index > start) {
5063            char c1 = a[--index];
5064            if (isHighSurrogate(c1)) {
5065                return toCodePoint(c1, c2);
5066            }
5067        }
5068        return c2;
5069    }
5070
5071    /**
5072     * Returns the leading surrogate (a
5073     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5074     * high surrogate code unit</a>) of the
5075     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5076     * surrogate pair</a>
5077     * representing the specified supplementary character (Unicode
5078     * code point) in the UTF-16 encoding.  If the specified character
5079     * is not a
5080     * <a href="Character.html#supplementary">supplementary character</a>,
5081     * an unspecified {@code char} is returned.
5082     *
5083     * <p>If
5084     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5085     * is {@code true}, then
5086     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5087     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5088     * are also always {@code true}.
5089     *
5090     * @param   codePoint a supplementary character (Unicode code point)
5091     * @return  the leading surrogate code unit used to represent the
5092     *          character in the UTF-16 encoding
5093     * @since   1.7
5094     */
5095    public static char highSurrogate(int codePoint) {
5096        return (char) ((codePoint >>> 10)
5097            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5098    }
5099
5100    /**
5101     * Returns the trailing surrogate (a
5102     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5103     * low surrogate code unit</a>) of the
5104     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5105     * surrogate pair</a>
5106     * representing the specified supplementary character (Unicode
5107     * code point) in the UTF-16 encoding.  If the specified character
5108     * is not a
5109     * <a href="Character.html#supplementary">supplementary character</a>,
5110     * an unspecified {@code char} is returned.
5111     *
5112     * <p>If
5113     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5114     * is {@code true}, then
5115     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5116     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5117     * are also always {@code true}.
5118     *
5119     * @param   codePoint a supplementary character (Unicode code point)
5120     * @return  the trailing surrogate code unit used to represent the
5121     *          character in the UTF-16 encoding
5122     * @since   1.7
5123     */
5124    public static char lowSurrogate(int codePoint) {
5125        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5126    }
5127
5128    /**
5129     * Converts the specified character (Unicode code point) to its
5130     * UTF-16 representation. If the specified code point is a BMP
5131     * (Basic Multilingual Plane or Plane 0) value, the same value is
5132     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5133     * specified code point is a supplementary character, its
5134     * surrogate values are stored in {@code dst[dstIndex]}
5135     * (high-surrogate) and {@code dst[dstIndex+1]}
5136     * (low-surrogate), and 2 is returned.
5137     *
5138     * @param  codePoint the character (Unicode code point) to be converted.
5139     * @param  dst an array of {@code char} in which the
5140     * {@code codePoint}'s UTF-16 value is stored.
5141     * @param dstIndex the start index into the {@code dst}
5142     * array where the converted value is stored.
5143     * @return 1 if the code point is a BMP code point, 2 if the
5144     * code point is a supplementary code point.
5145     * @exception IllegalArgumentException if the specified
5146     * {@code codePoint} is not a valid Unicode code point.
5147     * @exception NullPointerException if the specified {@code dst} is null.
5148     * @exception IndexOutOfBoundsException if {@code dstIndex}
5149     * is negative or not less than {@code dst.length}, or if
5150     * {@code dst} at {@code dstIndex} doesn't have enough
5151     * array element(s) to store the resulting {@code char}
5152     * value(s). (If {@code dstIndex} is equal to
5153     * {@code dst.length-1} and the specified
5154     * {@code codePoint} is a supplementary character, the
5155     * high-surrogate value is not stored in
5156     * {@code dst[dstIndex]}.)
5157     * @since  1.5
5158     */
5159    public static int toChars(int codePoint, char[] dst, int dstIndex) {
5160        if (isBmpCodePoint(codePoint)) {
5161            dst[dstIndex] = (char) codePoint;
5162            return 1;
5163        } else if (isValidCodePoint(codePoint)) {
5164            toSurrogates(codePoint, dst, dstIndex);
5165            return 2;
5166        } else {
5167            throw new IllegalArgumentException();
5168        }
5169    }
5170
5171    /**
5172     * Converts the specified character (Unicode code point) to its
5173     * UTF-16 representation stored in a {@code char} array. If
5174     * the specified code point is a BMP (Basic Multilingual Plane or
5175     * Plane 0) value, the resulting {@code char} array has
5176     * the same value as {@code codePoint}. If the specified code
5177     * point is a supplementary code point, the resulting
5178     * {@code char} array has the corresponding surrogate pair.
5179     *
5180     * @param  codePoint a Unicode code point
5181     * @return a {@code char} array having
5182     *         {@code codePoint}'s UTF-16 representation.
5183     * @exception IllegalArgumentException if the specified
5184     * {@code codePoint} is not a valid Unicode code point.
5185     * @since  1.5
5186     */
5187    public static char[] toChars(int codePoint) {
5188        if (isBmpCodePoint(codePoint)) {
5189            return new char[] { (char) codePoint };
5190        } else if (isValidCodePoint(codePoint)) {
5191            char[] result = new char[2];
5192            toSurrogates(codePoint, result, 0);
5193            return result;
5194        } else {
5195            throw new IllegalArgumentException();
5196        }
5197    }
5198
5199    static void toSurrogates(int codePoint, char[] dst, int index) {
5200        // We write elements "backwards" to guarantee all-or-nothing
5201        dst[index+1] = lowSurrogate(codePoint);
5202        dst[index] = highSurrogate(codePoint);
5203    }
5204
5205    /**
5206     * Returns the number of Unicode code points in the text range of
5207     * the specified char sequence. The text range begins at the
5208     * specified {@code beginIndex} and extends to the
5209     * {@code char} at index {@code endIndex - 1}. Thus the
5210     * length (in {@code char}s) of the text range is
5211     * {@code endIndex-beginIndex}. Unpaired surrogates within
5212     * the text range count as one code point each.
5213     *
5214     * @param seq the char sequence
5215     * @param beginIndex the index to the first {@code char} of
5216     * the text range.
5217     * @param endIndex the index after the last {@code char} of
5218     * the text range.
5219     * @return the number of Unicode code points in the specified text
5220     * range
5221     * @exception NullPointerException if {@code seq} is null.
5222     * @exception IndexOutOfBoundsException if the
5223     * {@code beginIndex} is negative, or {@code endIndex}
5224     * is larger than the length of the given sequence, or
5225     * {@code beginIndex} is larger than {@code endIndex}.
5226     * @since  1.5
5227     */
5228    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5229        int length = seq.length();
5230        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5231            throw new IndexOutOfBoundsException();
5232        }
5233        int n = endIndex - beginIndex;
5234        for (int i = beginIndex; i < endIndex; ) {
5235            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5236                isLowSurrogate(seq.charAt(i))) {
5237                n--;
5238                i++;
5239            }
5240        }
5241        return n;
5242    }
5243
5244    /**
5245     * Returns the number of Unicode code points in a subarray of the
5246     * {@code char} array argument. The {@code offset}
5247     * argument is the index of the first {@code char} of the
5248     * subarray and the {@code count} argument specifies the
5249     * length of the subarray in {@code char}s. Unpaired
5250     * surrogates within the subarray count as one code point each.
5251     *
5252     * @param a the {@code char} array
5253     * @param offset the index of the first {@code char} in the
5254     * given {@code char} array
5255     * @param count the length of the subarray in {@code char}s
5256     * @return the number of Unicode code points in the specified subarray
5257     * @exception NullPointerException if {@code a} is null.
5258     * @exception IndexOutOfBoundsException if {@code offset} or
5259     * {@code count} is negative, or if {@code offset +
5260     * count} is larger than the length of the given array.
5261     * @since  1.5
5262     */
5263    public static int codePointCount(char[] a, int offset, int count) {
5264        if (count > a.length - offset || offset < 0 || count < 0) {
5265            throw new IndexOutOfBoundsException();
5266        }
5267        return codePointCountImpl(a, offset, count);
5268    }
5269
5270    static int codePointCountImpl(char[] a, int offset, int count) {
5271        int endIndex = offset + count;
5272        int n = count;
5273        for (int i = offset; i < endIndex; ) {
5274            if (isHighSurrogate(a[i++]) && i < endIndex &&
5275                isLowSurrogate(a[i])) {
5276                n--;
5277                i++;
5278            }
5279        }
5280        return n;
5281    }
5282
5283    /**
5284     * Returns the index within the given char sequence that is offset
5285     * from the given {@code index} by {@code codePointOffset}
5286     * code points. Unpaired surrogates within the text range given by
5287     * {@code index} and {@code codePointOffset} count as
5288     * one code point each.
5289     *
5290     * @param seq the char sequence
5291     * @param index the index to be offset
5292     * @param codePointOffset the offset in code points
5293     * @return the index within the char sequence
5294     * @exception NullPointerException if {@code seq} is null.
5295     * @exception IndexOutOfBoundsException if {@code index}
5296     *   is negative or larger then the length of the char sequence,
5297     *   or if {@code codePointOffset} is positive and the
5298     *   subsequence starting with {@code index} has fewer than
5299     *   {@code codePointOffset} code points, or if
5300     *   {@code codePointOffset} is negative and the subsequence
5301     *   before {@code index} has fewer than the absolute value
5302     *   of {@code codePointOffset} code points.
5303     * @since 1.5
5304     */
5305    public static int offsetByCodePoints(CharSequence seq, int index,
5306                                         int codePointOffset) {
5307        int length = seq.length();
5308        if (index < 0 || index > length) {
5309            throw new IndexOutOfBoundsException();
5310        }
5311
5312        int x = index;
5313        if (codePointOffset >= 0) {
5314            int i;
5315            for (i = 0; x < length && i < codePointOffset; i++) {
5316                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5317                    isLowSurrogate(seq.charAt(x))) {
5318                    x++;
5319                }
5320            }
5321            if (i < codePointOffset) {
5322                throw new IndexOutOfBoundsException();
5323            }
5324        } else {
5325            int i;
5326            for (i = codePointOffset; x > 0 && i < 0; i++) {
5327                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5328                    isHighSurrogate(seq.charAt(x-1))) {
5329                    x--;
5330                }
5331            }
5332            if (i < 0) {
5333                throw new IndexOutOfBoundsException();
5334            }
5335        }
5336        return x;
5337    }
5338
5339    /**
5340     * Returns the index within the given {@code char} subarray
5341     * that is offset from the given {@code index} by
5342     * {@code codePointOffset} code points. The
5343     * {@code start} and {@code count} arguments specify a
5344     * subarray of the {@code char} array. Unpaired surrogates
5345     * within the text range given by {@code index} and
5346     * {@code codePointOffset} count as one code point each.
5347     *
5348     * @param a the {@code char} array
5349     * @param start the index of the first {@code char} of the
5350     * subarray
5351     * @param count the length of the subarray in {@code char}s
5352     * @param index the index to be offset
5353     * @param codePointOffset the offset in code points
5354     * @return the index within the subarray
5355     * @exception NullPointerException if {@code a} is null.
5356     * @exception IndexOutOfBoundsException
5357     *   if {@code start} or {@code count} is negative,
5358     *   or if {@code start + count} is larger than the length of
5359     *   the given array,
5360     *   or if {@code index} is less than {@code start} or
5361     *   larger then {@code start + count},
5362     *   or if {@code codePointOffset} is positive and the text range
5363     *   starting with {@code index} and ending with {@code start + count - 1}
5364     *   has fewer than {@code codePointOffset} code
5365     *   points,
5366     *   or if {@code codePointOffset} is negative and the text range
5367     *   starting with {@code start} and ending with {@code index - 1}
5368     *   has fewer than the absolute value of
5369     *   {@code codePointOffset} code points.
5370     * @since 1.5
5371     */
5372    public static int offsetByCodePoints(char[] a, int start, int count,
5373                                         int index, int codePointOffset) {
5374        if (count > a.length-start || start < 0 || count < 0
5375            || index < start || index > start+count) {
5376            throw new IndexOutOfBoundsException();
5377        }
5378        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5379    }
5380
5381    static int offsetByCodePointsImpl(char[]a, int start, int count,
5382                                      int index, int codePointOffset) {
5383        int x = index;
5384        if (codePointOffset >= 0) {
5385            int limit = start + count;
5386            int i;
5387            for (i = 0; x < limit && i < codePointOffset; i++) {
5388                if (isHighSurrogate(a[x++]) && x < limit &&
5389                    isLowSurrogate(a[x])) {
5390                    x++;
5391                }
5392            }
5393            if (i < codePointOffset) {
5394                throw new IndexOutOfBoundsException();
5395            }
5396        } else {
5397            int i;
5398            for (i = codePointOffset; x > start && i < 0; i++) {
5399                if (isLowSurrogate(a[--x]) && x > start &&
5400                    isHighSurrogate(a[x-1])) {
5401                    x--;
5402                }
5403            }
5404            if (i < 0) {
5405                throw new IndexOutOfBoundsException();
5406            }
5407        }
5408        return x;
5409    }
5410
5411    /**
5412     * Determines if the specified character is a lowercase character.
5413     * <p>
5414     * A character is lowercase if its general category type, provided
5415     * by {@code Character.getType(ch)}, is
5416     * {@code LOWERCASE_LETTER}, or it has contributory property
5417     * Other_Lowercase as defined by the Unicode Standard.
5418     * <p>
5419     * The following are examples of lowercase characters:
5420     * <blockquote><pre>
5421     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5422     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5423     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5424     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5425     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5426     * </pre></blockquote>
5427     * <p> Many other Unicode characters are lowercase too.
5428     *
5429     * <p><b>Note:</b> This method cannot handle <a
5430     * href="#supplementary"> supplementary characters</a>. To support
5431     * all Unicode characters, including supplementary characters, use
5432     * the {@link #isLowerCase(int)} method.
5433     *
5434     * @param   ch   the character to be tested.
5435     * @return  {@code true} if the character is lowercase;
5436     *          {@code false} otherwise.
5437     * @see     Character#isLowerCase(char)
5438     * @see     Character#isTitleCase(char)
5439     * @see     Character#toLowerCase(char)
5440     * @see     Character#getType(char)
5441     */
5442    public static boolean isLowerCase(char ch) {
5443        return isLowerCase((int)ch);
5444    }
5445
5446    /**
5447     * Determines if the specified character (Unicode code point) is a
5448     * lowercase character.
5449     * <p>
5450     * A character is lowercase if its general category type, provided
5451     * by {@link Character#getType getType(codePoint)}, is
5452     * {@code LOWERCASE_LETTER}, or it has contributory property
5453     * Other_Lowercase as defined by the Unicode Standard.
5454     * <p>
5455     * The following are examples of lowercase characters:
5456     * <blockquote><pre>
5457     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5458     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5459     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5460     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5461     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5462     * </pre></blockquote>
5463     * <p> Many other Unicode characters are lowercase too.
5464     *
5465     * @param   codePoint the character (Unicode code point) to be tested.
5466     * @return  {@code true} if the character is lowercase;
5467     *          {@code false} otherwise.
5468     * @see     Character#isLowerCase(int)
5469     * @see     Character#isTitleCase(int)
5470     * @see     Character#toLowerCase(int)
5471     * @see     Character#getType(int)
5472     * @since   1.5
5473     */
5474    public static boolean isLowerCase(int codePoint) {
5475        return isLowerCaseImpl(codePoint);
5476    }
5477
5478    static native boolean isLowerCaseImpl(int codePoint);
5479
5480    /**
5481     * Determines if the specified character is an uppercase character.
5482     * <p>
5483     * A character is uppercase if its general category type, provided by
5484     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5485     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5486     * <p>
5487     * The following are examples of uppercase characters:
5488     * <blockquote><pre>
5489     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5490     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5491     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5492     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5493     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5494     * </pre></blockquote>
5495     * <p> Many other Unicode characters are uppercase too.
5496     *
5497     * <p><b>Note:</b> This method cannot handle <a
5498     * href="#supplementary"> supplementary characters</a>. To support
5499     * all Unicode characters, including supplementary characters, use
5500     * the {@link #isUpperCase(int)} method.
5501     *
5502     * @param   ch   the character to be tested.
5503     * @return  {@code true} if the character is uppercase;
5504     *          {@code false} otherwise.
5505     * @see     Character#isLowerCase(char)
5506     * @see     Character#isTitleCase(char)
5507     * @see     Character#toUpperCase(char)
5508     * @see     Character#getType(char)
5509     * @since   1.0
5510     */
5511    public static boolean isUpperCase(char ch) {
5512        return isUpperCase((int)ch);
5513    }
5514
5515    /**
5516     * Determines if the specified character (Unicode code point) is an uppercase character.
5517     * <p>
5518     * A character is uppercase if its general category type, provided by
5519     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5520     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5521     * <p>
5522     * The following are examples of uppercase characters:
5523     * <blockquote><pre>
5524     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5525     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5526     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5527     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5528     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5529     * </pre></blockquote>
5530     * <p> Many other Unicode characters are uppercase too.<p>
5531     *
5532     * @param   codePoint the character (Unicode code point) to be tested.
5533     * @return  {@code true} if the character is uppercase;
5534     *          {@code false} otherwise.
5535     * @see     Character#isLowerCase(int)
5536     * @see     Character#isTitleCase(int)
5537     * @see     Character#toUpperCase(int)
5538     * @see     Character#getType(int)
5539     * @since   1.5
5540     */
5541    public static boolean isUpperCase(int codePoint) {
5542        return isUpperCaseImpl(codePoint);
5543    }
5544
5545    static native boolean isUpperCaseImpl(int codePoint);
5546
5547
5548    /**
5549     * Determines if the specified character is a titlecase character.
5550     * <p>
5551     * A character is a titlecase character if its general
5552     * category type, provided by {@code Character.getType(ch)},
5553     * is {@code TITLECASE_LETTER}.
5554     * <p>
5555     * Some characters look like pairs of Latin letters. For example, there
5556     * is an uppercase letter that looks like "LJ" and has a corresponding
5557     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5558     * is the appropriate form to use when rendering a word in lowercase
5559     * with initial capitals, as for a book title.
5560     * <p>
5561     * These are some of the Unicode characters for which this method returns
5562     * {@code true}:
5563     * <ul>
5564     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5565     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5566     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5567     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5568     * </ul>
5569     * <p> Many other Unicode characters are titlecase too.
5570     *
5571     * <p><b>Note:</b> This method cannot handle <a
5572     * href="#supplementary"> supplementary characters</a>. To support
5573     * all Unicode characters, including supplementary characters, use
5574     * the {@link #isTitleCase(int)} method.
5575     *
5576     * @param   ch   the character to be tested.
5577     * @return  {@code true} if the character is titlecase;
5578     *          {@code false} otherwise.
5579     * @see     Character#isLowerCase(char)
5580     * @see     Character#isUpperCase(char)
5581     * @see     Character#toTitleCase(char)
5582     * @see     Character#getType(char)
5583     * @since   1.0.2
5584     */
5585    public static boolean isTitleCase(char ch) {
5586        return isTitleCase((int)ch);
5587    }
5588
5589    /**
5590     * Determines if the specified character (Unicode code point) is a titlecase character.
5591     * <p>
5592     * A character is a titlecase character if its general
5593     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5594     * is {@code TITLECASE_LETTER}.
5595     * <p>
5596     * Some characters look like pairs of Latin letters. For example, there
5597     * is an uppercase letter that looks like "LJ" and has a corresponding
5598     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5599     * is the appropriate form to use when rendering a word in lowercase
5600     * with initial capitals, as for a book title.
5601     * <p>
5602     * These are some of the Unicode characters for which this method returns
5603     * {@code true}:
5604     * <ul>
5605     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5606     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5607     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5608     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5609     * </ul>
5610     * <p> Many other Unicode characters are titlecase too.<p>
5611     *
5612     * @param   codePoint the character (Unicode code point) to be tested.
5613     * @return  {@code true} if the character is titlecase;
5614     *          {@code false} otherwise.
5615     * @see     Character#isLowerCase(int)
5616     * @see     Character#isUpperCase(int)
5617     * @see     Character#toTitleCase(int)
5618     * @see     Character#getType(int)
5619     * @since   1.5
5620     */
5621    public static boolean isTitleCase(int codePoint) {
5622        return isTitleCaseImpl(codePoint);
5623    }
5624
5625    static native boolean isTitleCaseImpl(int codePoint);
5626
5627    /**
5628     * Determines if the specified character is a digit.
5629     * <p>
5630     * A character is a digit if its general category type, provided
5631     * by {@code Character.getType(ch)}, is
5632     * {@code DECIMAL_DIGIT_NUMBER}.
5633     * <p>
5634     * Some Unicode character ranges that contain digits:
5635     * <ul>
5636     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5637     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5638     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5639     *     Arabic-Indic digits
5640     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5641     *     Extended Arabic-Indic digits
5642     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5643     *     Devanagari digits
5644     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5645     *     Fullwidth digits
5646     * </ul>
5647     *
5648     * Many other character ranges contain digits as well.
5649     *
5650     * <p><b>Note:</b> This method cannot handle <a
5651     * href="#supplementary"> supplementary characters</a>. To support
5652     * all Unicode characters, including supplementary characters, use
5653     * the {@link #isDigit(int)} method.
5654     *
5655     * @param   ch   the character to be tested.
5656     * @return  {@code true} if the character is a digit;
5657     *          {@code false} otherwise.
5658     * @see     Character#digit(char, int)
5659     * @see     Character#forDigit(int, int)
5660     * @see     Character#getType(char)
5661     */
5662    public static boolean isDigit(char ch) {
5663        return isDigit((int)ch);
5664    }
5665
5666    /**
5667     * Determines if the specified character (Unicode code point) is a digit.
5668     * <p>
5669     * A character is a digit if its general category type, provided
5670     * by {@link Character#getType(int) getType(codePoint)}, is
5671     * {@code DECIMAL_DIGIT_NUMBER}.
5672     * <p>
5673     * Some Unicode character ranges that contain digits:
5674     * <ul>
5675     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5676     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5677     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5678     *     Arabic-Indic digits
5679     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5680     *     Extended Arabic-Indic digits
5681     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5682     *     Devanagari digits
5683     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5684     *     Fullwidth digits
5685     * </ul>
5686     *
5687     * Many other character ranges contain digits as well.
5688     *
5689     * @param   codePoint the character (Unicode code point) to be tested.
5690     * @return  {@code true} if the character is a digit;
5691     *          {@code false} otherwise.
5692     * @see     Character#forDigit(int, int)
5693     * @see     Character#getType(int)
5694     * @since   1.5
5695     */
5696    public static boolean isDigit(int codePoint) {
5697        return isDigitImpl(codePoint);
5698    }
5699
5700    static native boolean isDigitImpl(int codePoint);
5701
5702    /**
5703     * Determines if a character is defined in Unicode.
5704     * <p>
5705     * A character is defined if at least one of the following is true:
5706     * <ul>
5707     * <li>It has an entry in the UnicodeData file.
5708     * <li>It has a value in a range defined by the UnicodeData file.
5709     * </ul>
5710     *
5711     * <p><b>Note:</b> This method cannot handle <a
5712     * href="#supplementary"> supplementary characters</a>. To support
5713     * all Unicode characters, including supplementary characters, use
5714     * the {@link #isDefined(int)} method.
5715     *
5716     * @param   ch   the character to be tested
5717     * @return  {@code true} if the character has a defined meaning
5718     *          in Unicode; {@code false} otherwise.
5719     * @see     Character#isDigit(char)
5720     * @see     Character#isLetter(char)
5721     * @see     Character#isLetterOrDigit(char)
5722     * @see     Character#isLowerCase(char)
5723     * @see     Character#isTitleCase(char)
5724     * @see     Character#isUpperCase(char)
5725     * @since   1.0.2
5726     */
5727    public static boolean isDefined(char ch) {
5728        return isDefined((int)ch);
5729    }
5730
5731    /**
5732     * Determines if a character (Unicode code point) is defined in Unicode.
5733     * <p>
5734     * A character is defined if at least one of the following is true:
5735     * <ul>
5736     * <li>It has an entry in the UnicodeData file.
5737     * <li>It has a value in a range defined by the UnicodeData file.
5738     * </ul>
5739     *
5740     * @param   codePoint the character (Unicode code point) to be tested.
5741     * @return  {@code true} if the character has a defined meaning
5742     *          in Unicode; {@code false} otherwise.
5743     * @see     Character#isDigit(int)
5744     * @see     Character#isLetter(int)
5745     * @see     Character#isLetterOrDigit(int)
5746     * @see     Character#isLowerCase(int)
5747     * @see     Character#isTitleCase(int)
5748     * @see     Character#isUpperCase(int)
5749     * @since   1.5
5750     */
5751    public static boolean isDefined(int codePoint) {
5752        return isDefinedImpl(codePoint);
5753    }
5754
5755    static native boolean isDefinedImpl(int codePoint);
5756
5757    /**
5758     * Determines if the specified character is a letter.
5759     * <p>
5760     * A character is considered to be a letter if its general
5761     * category type, provided by {@code Character.getType(ch)},
5762     * is any of the following:
5763     * <ul>
5764     * <li> {@code UPPERCASE_LETTER}
5765     * <li> {@code LOWERCASE_LETTER}
5766     * <li> {@code TITLECASE_LETTER}
5767     * <li> {@code MODIFIER_LETTER}
5768     * <li> {@code OTHER_LETTER}
5769     * </ul>
5770     *
5771     * Not all letters have case. Many characters are
5772     * letters but are neither uppercase nor lowercase nor titlecase.
5773     *
5774     * <p><b>Note:</b> This method cannot handle <a
5775     * href="#supplementary"> supplementary characters</a>. To support
5776     * all Unicode characters, including supplementary characters, use
5777     * the {@link #isLetter(int)} method.
5778     *
5779     * @param   ch   the character to be tested.
5780     * @return  {@code true} if the character is a letter;
5781     *          {@code false} otherwise.
5782     * @see     Character#isDigit(char)
5783     * @see     Character#isJavaIdentifierStart(char)
5784     * @see     Character#isJavaLetter(char)
5785     * @see     Character#isJavaLetterOrDigit(char)
5786     * @see     Character#isLetterOrDigit(char)
5787     * @see     Character#isLowerCase(char)
5788     * @see     Character#isTitleCase(char)
5789     * @see     Character#isUnicodeIdentifierStart(char)
5790     * @see     Character#isUpperCase(char)
5791     */
5792    public static boolean isLetter(char ch) {
5793        return isLetter((int)ch);
5794    }
5795
5796    /**
5797     * Determines if the specified character (Unicode code point) is a letter.
5798     * <p>
5799     * A character is considered to be a letter if its general
5800     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5801     * is any of the following:
5802     * <ul>
5803     * <li> {@code UPPERCASE_LETTER}
5804     * <li> {@code LOWERCASE_LETTER}
5805     * <li> {@code TITLECASE_LETTER}
5806     * <li> {@code MODIFIER_LETTER}
5807     * <li> {@code OTHER_LETTER}
5808     * </ul>
5809     *
5810     * Not all letters have case. Many characters are
5811     * letters but are neither uppercase nor lowercase nor titlecase.
5812     *
5813     * @param   codePoint the character (Unicode code point) to be tested.
5814     * @return  {@code true} if the character is a letter;
5815     *          {@code false} otherwise.
5816     * @see     Character#isDigit(int)
5817     * @see     Character#isJavaIdentifierStart(int)
5818     * @see     Character#isLetterOrDigit(int)
5819     * @see     Character#isLowerCase(int)
5820     * @see     Character#isTitleCase(int)
5821     * @see     Character#isUnicodeIdentifierStart(int)
5822     * @see     Character#isUpperCase(int)
5823     * @since   1.5
5824     */
5825    public static boolean isLetter(int codePoint) {
5826        return isLetterImpl(codePoint);
5827    }
5828
5829    static native boolean isLetterImpl(int codePoint);
5830
5831    /**
5832     * Determines if the specified character is a letter or digit.
5833     * <p>
5834     * A character is considered to be a letter or digit if either
5835     * {@code Character.isLetter(char ch)} or
5836     * {@code Character.isDigit(char ch)} returns
5837     * {@code true} for the character.
5838     *
5839     * <p><b>Note:</b> This method cannot handle <a
5840     * href="#supplementary"> supplementary characters</a>. To support
5841     * all Unicode characters, including supplementary characters, use
5842     * the {@link #isLetterOrDigit(int)} method.
5843     *
5844     * @param   ch   the character to be tested.
5845     * @return  {@code true} if the character is a letter or digit;
5846     *          {@code false} otherwise.
5847     * @see     Character#isDigit(char)
5848     * @see     Character#isJavaIdentifierPart(char)
5849     * @see     Character#isJavaLetter(char)
5850     * @see     Character#isJavaLetterOrDigit(char)
5851     * @see     Character#isLetter(char)
5852     * @see     Character#isUnicodeIdentifierPart(char)
5853     * @since   1.0.2
5854     */
5855    public static boolean isLetterOrDigit(char ch) {
5856        return isLetterOrDigit((int)ch);
5857    }
5858
5859    /**
5860     * Determines if the specified character (Unicode code point) is a letter or digit.
5861     * <p>
5862     * A character is considered to be a letter or digit if either
5863     * {@link #isLetter(int) isLetter(codePoint)} or
5864     * {@link #isDigit(int) isDigit(codePoint)} returns
5865     * {@code true} for the character.
5866     *
5867     * @param   codePoint the character (Unicode code point) to be tested.
5868     * @return  {@code true} if the character is a letter or digit;
5869     *          {@code false} otherwise.
5870     * @see     Character#isDigit(int)
5871     * @see     Character#isJavaIdentifierPart(int)
5872     * @see     Character#isLetter(int)
5873     * @see     Character#isUnicodeIdentifierPart(int)
5874     * @since   1.5
5875     */
5876    public static boolean isLetterOrDigit(int codePoint) {
5877        return isLetterOrDigitImpl(codePoint);
5878    }
5879
5880    static native boolean isLetterOrDigitImpl(int codePoint);
5881
5882    /**
5883     * Determines if the specified character is permissible as the first
5884     * character in a Java identifier.
5885     * <p>
5886     * A character may start a Java identifier if and only if
5887     * one of the following is true:
5888     * <ul>
5889     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5890     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5891     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5892     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5893     * </ul>
5894     *
5895     * @param   ch the character to be tested.
5896     * @return  {@code true} if the character may start a Java
5897     *          identifier; {@code false} otherwise.
5898     * @see     Character#isJavaLetterOrDigit(char)
5899     * @see     Character#isJavaIdentifierStart(char)
5900     * @see     Character#isJavaIdentifierPart(char)
5901     * @see     Character#isLetter(char)
5902     * @see     Character#isLetterOrDigit(char)
5903     * @see     Character#isUnicodeIdentifierStart(char)
5904     * @since   1.02
5905     * @deprecated Replaced by isJavaIdentifierStart(char).
5906     */
5907    @Deprecated
5908    public static boolean isJavaLetter(char ch) {
5909        return isJavaIdentifierStart(ch);
5910    }
5911
5912    /**
5913     * Determines if the specified character may be part of a Java
5914     * identifier as other than the first character.
5915     * <p>
5916     * A character may be part of a Java identifier if and only if any
5917     * of the following are true:
5918     * <ul>
5919     * <li>  it is a letter
5920     * <li>  it is a currency symbol (such as {@code '$'})
5921     * <li>  it is a connecting punctuation character (such as {@code '_'})
5922     * <li>  it is a digit
5923     * <li>  it is a numeric letter (such as a Roman numeral character)
5924     * <li>  it is a combining mark
5925     * <li>  it is a non-spacing mark
5926     * <li> {@code isIdentifierIgnorable} returns
5927     * {@code true} for the character.
5928     * </ul>
5929     *
5930     * @param   ch the character to be tested.
5931     * @return  {@code true} if the character may be part of a
5932     *          Java identifier; {@code false} otherwise.
5933     * @see     Character#isJavaLetter(char)
5934     * @see     Character#isJavaIdentifierStart(char)
5935     * @see     Character#isJavaIdentifierPart(char)
5936     * @see     Character#isLetter(char)
5937     * @see     Character#isLetterOrDigit(char)
5938     * @see     Character#isUnicodeIdentifierPart(char)
5939     * @see     Character#isIdentifierIgnorable(char)
5940     * @since   1.02
5941     * @deprecated Replaced by isJavaIdentifierPart(char).
5942     */
5943    @Deprecated
5944    public static boolean isJavaLetterOrDigit(char ch) {
5945        return isJavaIdentifierPart(ch);
5946    }
5947
5948    /**
5949     * Determines if the specified character (Unicode code point) is an alphabet.
5950     * <p>
5951     * A character is considered to be alphabetic if its general category type,
5952     * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5953     * the following:
5954     * <ul>
5955     * <li> <code>UPPERCASE_LETTER</code>
5956     * <li> <code>LOWERCASE_LETTER</code>
5957     * <li> <code>TITLECASE_LETTER</code>
5958     * <li> <code>MODIFIER_LETTER</code>
5959     * <li> <code>OTHER_LETTER</code>
5960     * <li> <code>LETTER_NUMBER</code>
5961     * </ul>
5962     * or it has contributory property Other_Alphabetic as defined by the
5963     * Unicode Standard.
5964     *
5965     * @param   codePoint the character (Unicode code point) to be tested.
5966     * @return  <code>true</code> if the character is a Unicode alphabet
5967     *          character, <code>false</code> otherwise.
5968     * @since   1.7
5969     */
5970    public static boolean isAlphabetic(int codePoint) {
5971        return isAlphabeticImpl(codePoint);
5972    }
5973
5974    static native boolean isAlphabeticImpl(int codePoint);
5975
5976
5977    /**
5978     * Determines if the specified character (Unicode code point) is a CJKV
5979     * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5980     * the Unicode Standard.
5981     *
5982     * @param   codePoint the character (Unicode code point) to be tested.
5983     * @return  <code>true</code> if the character is a Unicode ideograph
5984     *          character, <code>false</code> otherwise.
5985     * @since   1.7
5986     */
5987    public static boolean isIdeographic(int codePoint) {
5988        return isIdeographicImpl(codePoint);
5989    }
5990    static native boolean isIdeographicImpl(int codePoint);
5991
5992    /**
5993     * Determines if the specified character is
5994     * permissible as the first character in a Java identifier.
5995     * <p>
5996     * A character may start a Java identifier if and only if
5997     * one of the following conditions is true:
5998     * <ul>
5999     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6000     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
6001     * <li> {@code ch} is a currency symbol (such as {@code '$'})
6002     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
6003     * </ul>
6004     *
6005     * <p><b>Note:</b> This method cannot handle <a
6006     * href="#supplementary"> supplementary characters</a>. To support
6007     * all Unicode characters, including supplementary characters, use
6008     * the {@link #isJavaIdentifierStart(int)} method.
6009     *
6010     * @param   ch the character to be tested.
6011     * @return  {@code true} if the character may start a Java identifier;
6012     *          {@code false} otherwise.
6013     * @see     Character#isJavaIdentifierPart(char)
6014     * @see     Character#isLetter(char)
6015     * @see     Character#isUnicodeIdentifierStart(char)
6016     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6017     * @since   1.1
6018     */
6019    public static boolean isJavaIdentifierStart(char ch) {
6020        return isJavaIdentifierStart((int)ch);
6021    }
6022
6023    /**
6024     * Determines if the character (Unicode code point) is
6025     * permissible as the first character in a Java identifier.
6026     * <p>
6027     * A character may start a Java identifier if and only if
6028     * one of the following conditions is true:
6029     * <ul>
6030     * <li> {@link #isLetter(int) isLetter(codePoint)}
6031     *      returns {@code true}
6032     * <li> {@link #getType(int) getType(codePoint)}
6033     *      returns {@code LETTER_NUMBER}
6034     * <li> the referenced character is a currency symbol (such as {@code '$'})
6035     * <li> the referenced character is a connecting punctuation character
6036     *      (such as {@code '_'}).
6037     * </ul>
6038     *
6039     * @param   codePoint the character (Unicode code point) to be tested.
6040     * @return  {@code true} if the character may start a Java identifier;
6041     *          {@code false} otherwise.
6042     * @see     Character#isJavaIdentifierPart(int)
6043     * @see     Character#isLetter(int)
6044     * @see     Character#isUnicodeIdentifierStart(int)
6045     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6046     * @since   1.5
6047     */
6048    public static boolean isJavaIdentifierStart(int codePoint) {
6049        // Use precomputed bitmasks to optimize the ASCII range.
6050        if (codePoint < 64) {
6051            return (codePoint == '$'); // There's only one character in this range.
6052        } else if (codePoint < 128) {
6053            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6054        }
6055        return ((1 << getType(codePoint))
6056                & ((1 << UPPERCASE_LETTER)
6057                   | (1 << LOWERCASE_LETTER)
6058                   | (1  << TITLECASE_LETTER)
6059                   | (1  << MODIFIER_LETTER)
6060                   | (1  << OTHER_LETTER)
6061                   | (1  << CURRENCY_SYMBOL)
6062                   | (1  << CONNECTOR_PUNCTUATION)
6063                   | (1  << LETTER_NUMBER))) != 0;
6064    }
6065
6066    /**
6067     * Determines if the specified character may be part of a Java
6068     * identifier as other than the first character.
6069     * <p>
6070     * A character may be part of a Java identifier if any of the following
6071     * are true:
6072     * <ul>
6073     * <li>  it is a letter
6074     * <li>  it is a currency symbol (such as {@code '$'})
6075     * <li>  it is a connecting punctuation character (such as {@code '_'})
6076     * <li>  it is a digit
6077     * <li>  it is a numeric letter (such as a Roman numeral character)
6078     * <li>  it is a combining mark
6079     * <li>  it is a non-spacing mark
6080     * <li> {@code isIdentifierIgnorable} returns
6081     * {@code true} for the character
6082     * </ul>
6083     *
6084     * <p><b>Note:</b> This method cannot handle <a
6085     * href="#supplementary"> supplementary characters</a>. To support
6086     * all Unicode characters, including supplementary characters, use
6087     * the {@link #isJavaIdentifierPart(int)} method.
6088     *
6089     * @param   ch      the character to be tested.
6090     * @return {@code true} if the character may be part of a
6091     *          Java identifier; {@code false} otherwise.
6092     * @see     Character#isIdentifierIgnorable(char)
6093     * @see     Character#isJavaIdentifierStart(char)
6094     * @see     Character#isLetterOrDigit(char)
6095     * @see     Character#isUnicodeIdentifierPart(char)
6096     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6097     * @since   1.1
6098     */
6099    public static boolean isJavaIdentifierPart(char ch) {
6100        return isJavaIdentifierPart((int)ch);
6101    }
6102
6103    /**
6104     * Determines if the character (Unicode code point) may be part of a Java
6105     * identifier as other than the first character.
6106     * <p>
6107     * A character may be part of a Java identifier if any of the following
6108     * are true:
6109     * <ul>
6110     * <li>  it is a letter
6111     * <li>  it is a currency symbol (such as {@code '$'})
6112     * <li>  it is a connecting punctuation character (such as {@code '_'})
6113     * <li>  it is a digit
6114     * <li>  it is a numeric letter (such as a Roman numeral character)
6115     * <li>  it is a combining mark
6116     * <li>  it is a non-spacing mark
6117     * <li> {@link #isIdentifierIgnorable(int)
6118     * isIdentifierIgnorable(codePoint)} returns {@code true} for
6119     * the character
6120     * </ul>
6121     *
6122     * @param   codePoint the character (Unicode code point) to be tested.
6123     * @return {@code true} if the character may be part of a
6124     *          Java identifier; {@code false} otherwise.
6125     * @see     Character#isIdentifierIgnorable(int)
6126     * @see     Character#isJavaIdentifierStart(int)
6127     * @see     Character#isLetterOrDigit(int)
6128     * @see     Character#isUnicodeIdentifierPart(int)
6129     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6130     * @since   1.5
6131     */
6132    public static boolean isJavaIdentifierPart(int codePoint) {
6133        // Use precomputed bitmasks to optimize the ASCII range.
6134        if (codePoint < 64) {
6135            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
6136        } else if (codePoint < 128) {
6137            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6138        }
6139        return ((1 << getType(codePoint))
6140                & ((1 << UPPERCASE_LETTER)
6141                   | (1 << LOWERCASE_LETTER)
6142                   | (1 << TITLECASE_LETTER)
6143                   | (1 << MODIFIER_LETTER)
6144                   | (1 << OTHER_LETTER)
6145                   | (1 << CURRENCY_SYMBOL)
6146                   | (1 << CONNECTOR_PUNCTUATION)
6147                   | (1 << DECIMAL_DIGIT_NUMBER)
6148                   | (1 << LETTER_NUMBER)
6149                   | (1 << FORMAT)
6150                   | (1 << COMBINING_SPACING_MARK)
6151                   | (1 << NON_SPACING_MARK))) != 0
6152                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
6153                || (codePoint >= 0x7f && codePoint <= 0x9f);
6154    }
6155
6156    /**
6157     * Determines if the specified character is permissible as the
6158     * first character in a Unicode identifier.
6159     * <p>
6160     * A character may start a Unicode identifier if and only if
6161     * one of the following conditions is true:
6162     * <ul>
6163     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6164     * <li> {@link #getType(char) getType(ch)} returns
6165     *      {@code LETTER_NUMBER}.
6166     * </ul>
6167     *
6168     * <p><b>Note:</b> This method cannot handle <a
6169     * href="#supplementary"> supplementary characters</a>. To support
6170     * all Unicode characters, including supplementary characters, use
6171     * the {@link #isUnicodeIdentifierStart(int)} method.
6172     *
6173     * @param   ch      the character to be tested.
6174     * @return  {@code true} if the character may start a Unicode
6175     *          identifier; {@code false} otherwise.
6176     * @see     Character#isJavaIdentifierStart(char)
6177     * @see     Character#isLetter(char)
6178     * @see     Character#isUnicodeIdentifierPart(char)
6179     * @since   1.1
6180     */
6181    public static boolean isUnicodeIdentifierStart(char ch) {
6182        return isUnicodeIdentifierStart((int)ch);
6183    }
6184
6185    /**
6186     * Determines if the specified character (Unicode code point) is permissible as the
6187     * first character in a Unicode identifier.
6188     * <p>
6189     * A character may start a Unicode identifier if and only if
6190     * one of the following conditions is true:
6191     * <ul>
6192     * <li> {@link #isLetter(int) isLetter(codePoint)}
6193     *      returns {@code true}
6194     * <li> {@link #getType(int) getType(codePoint)}
6195     *      returns {@code LETTER_NUMBER}.
6196     * </ul>
6197     * @param   codePoint the character (Unicode code point) to be tested.
6198     * @return  {@code true} if the character may start a Unicode
6199     *          identifier; {@code false} otherwise.
6200     * @see     Character#isJavaIdentifierStart(int)
6201     * @see     Character#isLetter(int)
6202     * @see     Character#isUnicodeIdentifierPart(int)
6203     * @since   1.5
6204     */
6205    public static boolean isUnicodeIdentifierStart(int codePoint) {
6206        return isUnicodeIdentifierStartImpl(codePoint);
6207    }
6208
6209    static native boolean isUnicodeIdentifierStartImpl(int codePoint);
6210
6211    /**
6212     * Determines if the specified character may be part of a Unicode
6213     * identifier as other than the first character.
6214     * <p>
6215     * A character may be part of a Unicode identifier if and only if
6216     * one of the following statements is true:
6217     * <ul>
6218     * <li>  it is a letter
6219     * <li>  it is a connecting punctuation character (such as {@code '_'})
6220     * <li>  it is a digit
6221     * <li>  it is a numeric letter (such as a Roman numeral character)
6222     * <li>  it is a combining mark
6223     * <li>  it is a non-spacing mark
6224     * <li> {@code isIdentifierIgnorable} returns
6225     * {@code true} for this character.
6226     * </ul>
6227     *
6228     * <p><b>Note:</b> This method cannot handle <a
6229     * href="#supplementary"> supplementary characters</a>. To support
6230     * all Unicode characters, including supplementary characters, use
6231     * the {@link #isUnicodeIdentifierPart(int)} method.
6232     *
6233     * @param   ch      the character to be tested.
6234     * @return  {@code true} if the character may be part of a
6235     *          Unicode identifier; {@code false} otherwise.
6236     * @see     Character#isIdentifierIgnorable(char)
6237     * @see     Character#isJavaIdentifierPart(char)
6238     * @see     Character#isLetterOrDigit(char)
6239     * @see     Character#isUnicodeIdentifierStart(char)
6240     * @since   1.1
6241     */
6242    public static boolean isUnicodeIdentifierPart(char ch) {
6243        return isUnicodeIdentifierPart((int)ch);
6244    }
6245
6246    /**
6247     * Determines if the specified character (Unicode code point) may be part of a Unicode
6248     * identifier as other than the first character.
6249     * <p>
6250     * A character may be part of a Unicode identifier if and only if
6251     * one of the following statements is true:
6252     * <ul>
6253     * <li>  it is a letter
6254     * <li>  it is a connecting punctuation character (such as {@code '_'})
6255     * <li>  it is a digit
6256     * <li>  it is a numeric letter (such as a Roman numeral character)
6257     * <li>  it is a combining mark
6258     * <li>  it is a non-spacing mark
6259     * <li> {@code isIdentifierIgnorable} returns
6260     * {@code true} for this character.
6261     * </ul>
6262     * @param   codePoint the character (Unicode code point) to be tested.
6263     * @return  {@code true} if the character may be part of a
6264     *          Unicode identifier; {@code false} otherwise.
6265     * @see     Character#isIdentifierIgnorable(int)
6266     * @see     Character#isJavaIdentifierPart(int)
6267     * @see     Character#isLetterOrDigit(int)
6268     * @see     Character#isUnicodeIdentifierStart(int)
6269     * @since   1.5
6270     */
6271    public static boolean isUnicodeIdentifierPart(int codePoint) {
6272        return isUnicodeIdentifierPartImpl(codePoint);
6273    }
6274
6275    static native boolean isUnicodeIdentifierPartImpl(int codePoint);
6276
6277    /**
6278     * Determines if the specified character should be regarded as
6279     * an ignorable character in a Java identifier or a Unicode identifier.
6280     * <p>
6281     * The following Unicode characters are ignorable in a Java identifier
6282     * or a Unicode identifier:
6283     * <ul>
6284     * <li>ISO control characters that are not whitespace
6285     * <ul>
6286     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6287     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6288     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6289     * </ul>
6290     *
6291     * <li>all characters that have the {@code FORMAT} general
6292     * category value
6293     * </ul>
6294     *
6295     * <p><b>Note:</b> This method cannot handle <a
6296     * href="#supplementary"> supplementary characters</a>. To support
6297     * all Unicode characters, including supplementary characters, use
6298     * the {@link #isIdentifierIgnorable(int)} method.
6299     *
6300     * @param   ch      the character to be tested.
6301     * @return  {@code true} if the character is an ignorable control
6302     *          character that may be part of a Java or Unicode identifier;
6303     *           {@code false} otherwise.
6304     * @see     Character#isJavaIdentifierPart(char)
6305     * @see     Character#isUnicodeIdentifierPart(char)
6306     * @since   1.1
6307     */
6308    public static boolean isIdentifierIgnorable(char ch) {
6309        return isIdentifierIgnorable((int)ch);
6310    }
6311
6312    /**
6313     * Determines if the specified character (Unicode code point) should be regarded as
6314     * an ignorable character in a Java identifier or a Unicode identifier.
6315     * <p>
6316     * The following Unicode characters are ignorable in a Java identifier
6317     * or a Unicode identifier:
6318     * <ul>
6319     * <li>ISO control characters that are not whitespace
6320     * <ul>
6321     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6322     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6323     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6324     * </ul>
6325     *
6326     * <li>all characters that have the {@code FORMAT} general
6327     * category value
6328     * </ul>
6329     *
6330     * @param   codePoint the character (Unicode code point) to be tested.
6331     * @return  {@code true} if the character is an ignorable control
6332     *          character that may be part of a Java or Unicode identifier;
6333     *          {@code false} otherwise.
6334     * @see     Character#isJavaIdentifierPart(int)
6335     * @see     Character#isUnicodeIdentifierPart(int)
6336     * @since   1.5
6337     */
6338    public static boolean isIdentifierIgnorable(int codePoint) {
6339        return isIdentifierIgnorableImpl(codePoint);
6340    }
6341
6342    static native boolean isIdentifierIgnorableImpl(int codePoint);
6343
6344    /**
6345     * Converts the character argument to lowercase using case
6346     * mapping information from the UnicodeData file.
6347     * <p>
6348     * Note that
6349     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6350     * does not always return {@code true} for some ranges of
6351     * characters, particularly those that are symbols or ideographs.
6352     *
6353     * <p>In general, {@link String#toLowerCase()} should be used to map
6354     * characters to lowercase. {@code String} case mapping methods
6355     * have several benefits over {@code Character} case mapping methods.
6356     * {@code String} case mapping methods can perform locale-sensitive
6357     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6358     * the {@code Character} case mapping methods cannot.
6359     *
6360     * <p><b>Note:</b> This method cannot handle <a
6361     * href="#supplementary"> supplementary characters</a>. To support
6362     * all Unicode characters, including supplementary characters, use
6363     * the {@link #toLowerCase(int)} method.
6364     *
6365     * @param   ch   the character to be converted.
6366     * @return  the lowercase equivalent of the character, if any;
6367     *          otherwise, the character itself.
6368     * @see     Character#isLowerCase(char)
6369     * @see     String#toLowerCase()
6370     */
6371    public static char toLowerCase(char ch) {
6372        return (char)toLowerCase((int)ch);
6373    }
6374
6375    /**
6376     * Converts the character (Unicode code point) argument to
6377     * lowercase using case mapping information from the UnicodeData
6378     * file.
6379     *
6380     * <p> Note that
6381     * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6382     * does not always return {@code true} for some ranges of
6383     * characters, particularly those that are symbols or ideographs.
6384     *
6385     * <p>In general, {@link String#toLowerCase()} should be used to map
6386     * characters to lowercase. {@code String} case mapping methods
6387     * have several benefits over {@code Character} case mapping methods.
6388     * {@code String} case mapping methods can perform locale-sensitive
6389     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6390     * the {@code Character} case mapping methods cannot.
6391     *
6392     * @param   codePoint   the character (Unicode code point) to be converted.
6393     * @return  the lowercase equivalent of the character (Unicode code
6394     *          point), if any; otherwise, the character itself.
6395     * @see     Character#isLowerCase(int)
6396     * @see     String#toLowerCase()
6397     *
6398     * @since   1.5
6399     */
6400    public static int toLowerCase(int codePoint) {
6401        return toLowerCaseImpl(codePoint);
6402    }
6403
6404    static native int toLowerCaseImpl(int codePoint);
6405
6406    /**
6407     * Converts the character argument to uppercase using case mapping
6408     * information from the UnicodeData file.
6409     * <p>
6410     * Note that
6411     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6412     * does not always return {@code true} for some ranges of
6413     * characters, particularly those that are symbols or ideographs.
6414     *
6415     * <p>In general, {@link String#toUpperCase()} should be used to map
6416     * characters to uppercase. {@code String} case mapping methods
6417     * have several benefits over {@code Character} case mapping methods.
6418     * {@code String} case mapping methods can perform locale-sensitive
6419     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6420     * the {@code Character} case mapping methods cannot.
6421     *
6422     * <p><b>Note:</b> This method cannot handle <a
6423     * href="#supplementary"> supplementary characters</a>. To support
6424     * all Unicode characters, including supplementary characters, use
6425     * the {@link #toUpperCase(int)} method.
6426     *
6427     * @param   ch   the character to be converted.
6428     * @return  the uppercase equivalent of the character, if any;
6429     *          otherwise, the character itself.
6430     * @see     Character#isUpperCase(char)
6431     * @see     String#toUpperCase()
6432     */
6433    public static char toUpperCase(char ch) {
6434        return (char)toUpperCase((int)ch);
6435    }
6436
6437    /**
6438     * Converts the character (Unicode code point) argument to
6439     * uppercase using case mapping information from the UnicodeData
6440     * file.
6441     *
6442     * <p>Note that
6443     * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6444     * does not always return {@code true} for some ranges of
6445     * characters, particularly those that are symbols or ideographs.
6446     *
6447     * <p>In general, {@link String#toUpperCase()} should be used to map
6448     * characters to uppercase. {@code String} case mapping methods
6449     * have several benefits over {@code Character} case mapping methods.
6450     * {@code String} case mapping methods can perform locale-sensitive
6451     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6452     * the {@code Character} case mapping methods cannot.
6453     *
6454     * @param   codePoint   the character (Unicode code point) to be converted.
6455     * @return  the uppercase equivalent of the character, if any;
6456     *          otherwise, the character itself.
6457     * @see     Character#isUpperCase(int)
6458     * @see     String#toUpperCase()
6459     *
6460     * @since   1.5
6461     */
6462    public static int toUpperCase(int codePoint) {
6463        return toUpperCaseImpl(codePoint);
6464    }
6465
6466    static native int toUpperCaseImpl(int codePoint);
6467
6468    /**
6469     * Converts the character argument to titlecase using case mapping
6470     * information from the UnicodeData file. If a character has no
6471     * explicit titlecase mapping and is not itself a titlecase char
6472     * according to UnicodeData, then the uppercase mapping is
6473     * returned as an equivalent titlecase mapping. If the
6474     * {@code char} argument is already a titlecase
6475     * {@code char}, the same {@code char} value will be
6476     * returned.
6477     * <p>
6478     * Note that
6479     * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6480     * does not always return {@code true} for some ranges of
6481     * characters.
6482     *
6483     * <p><b>Note:</b> This method cannot handle <a
6484     * href="#supplementary"> supplementary characters</a>. To support
6485     * all Unicode characters, including supplementary characters, use
6486     * the {@link #toTitleCase(int)} method.
6487     *
6488     * @param   ch   the character to be converted.
6489     * @return  the titlecase equivalent of the character, if any;
6490     *          otherwise, the character itself.
6491     * @see     Character#isTitleCase(char)
6492     * @see     Character#toLowerCase(char)
6493     * @see     Character#toUpperCase(char)
6494     * @since   1.0.2
6495     */
6496    public static char toTitleCase(char ch) {
6497        return (char)toTitleCase((int)ch);
6498    }
6499
6500    /**
6501     * Converts the character (Unicode code point) argument to titlecase using case mapping
6502     * information from the UnicodeData file. If a character has no
6503     * explicit titlecase mapping and is not itself a titlecase char
6504     * according to UnicodeData, then the uppercase mapping is
6505     * returned as an equivalent titlecase mapping. If the
6506     * character argument is already a titlecase
6507     * character, the same character value will be
6508     * returned.
6509     *
6510     * <p>Note that
6511     * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6512     * does not always return {@code true} for some ranges of
6513     * characters.
6514     *
6515     * @param   codePoint   the character (Unicode code point) to be converted.
6516     * @return  the titlecase equivalent of the character, if any;
6517     *          otherwise, the character itself.
6518     * @see     Character#isTitleCase(int)
6519     * @see     Character#toLowerCase(int)
6520     * @see     Character#toUpperCase(int)
6521     * @since   1.5
6522     */
6523    public static int toTitleCase(int codePoint) {
6524        return toTitleCaseImpl(codePoint);
6525    }
6526
6527    static native int toTitleCaseImpl(int codePoint);
6528
6529    /**
6530     * Returns the numeric value of the character {@code ch} in the
6531     * specified radix.
6532     * <p>
6533     * If the radix is not in the range {@code MIN_RADIX} &le;
6534     * {@code radix} &le; {@code MAX_RADIX} or if the
6535     * value of {@code ch} is not a valid digit in the specified
6536     * radix, {@code -1} is returned. A character is a valid digit
6537     * if at least one of the following is true:
6538     * <ul>
6539     * <li>The method {@code isDigit} is {@code true} of the character
6540     *     and the Unicode decimal digit value of the character (or its
6541     *     single-character decomposition) is less than the specified radix.
6542     *     In this case the decimal digit value is returned.
6543     * <li>The character is one of the uppercase Latin letters
6544     *     {@code 'A'} through {@code 'Z'} and its code is less than
6545     *     {@code radix + 'A' - 10}.
6546     *     In this case, {@code ch - 'A' + 10}
6547     *     is returned.
6548     * <li>The character is one of the lowercase Latin letters
6549     *     {@code 'a'} through {@code 'z'} and its code is less than
6550     *     {@code radix + 'a' - 10}.
6551     *     In this case, {@code ch - 'a' + 10}
6552     *     is returned.
6553     * <li>The character is one of the fullwidth uppercase Latin letters A
6554     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6555     *     and its code is less than
6556     *     {@code radix + '\u005CuFF21' - 10}.
6557     *     In this case, {@code ch - '\u005CuFF21' + 10}
6558     *     is returned.
6559     * <li>The character is one of the fullwidth lowercase Latin letters a
6560     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6561     *     and its code is less than
6562     *     {@code radix + '\u005CuFF41' - 10}.
6563     *     In this case, {@code ch - '\u005CuFF41' + 10}
6564     *     is returned.
6565     * </ul>
6566     *
6567     * <p><b>Note:</b> This method cannot handle <a
6568     * href="#supplementary"> supplementary characters</a>. To support
6569     * all Unicode characters, including supplementary characters, use
6570     * the {@link #digit(int, int)} method.
6571     *
6572     * @param   ch      the character to be converted.
6573     * @param   radix   the radix.
6574     * @return  the numeric value represented by the character in the
6575     *          specified radix.
6576     * @see     Character#forDigit(int, int)
6577     * @see     Character#isDigit(char)
6578     */
6579    public static int digit(char ch, int radix) {
6580        return digit((int)ch, radix);
6581    }
6582
6583    /**
6584     * Returns the numeric value of the specified character (Unicode
6585     * code point) in the specified radix.
6586     *
6587     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6588     * {@code radix} &le; {@code MAX_RADIX} or if the
6589     * character is not a valid digit in the specified
6590     * radix, {@code -1} is returned. A character is a valid digit
6591     * if at least one of the following is true:
6592     * <ul>
6593     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6594     *     and the Unicode decimal digit value of the character (or its
6595     *     single-character decomposition) is less than the specified radix.
6596     *     In this case the decimal digit value is returned.
6597     * <li>The character is one of the uppercase Latin letters
6598     *     {@code 'A'} through {@code 'Z'} and its code is less than
6599     *     {@code radix + 'A' - 10}.
6600     *     In this case, {@code codePoint - 'A' + 10}
6601     *     is returned.
6602     * <li>The character is one of the lowercase Latin letters
6603     *     {@code 'a'} through {@code 'z'} and its code is less than
6604     *     {@code radix + 'a' - 10}.
6605     *     In this case, {@code codePoint - 'a' + 10}
6606     *     is returned.
6607     * <li>The character is one of the fullwidth uppercase Latin letters A
6608     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6609     *     and its code is less than
6610     *     {@code radix + '\u005CuFF21' - 10}.
6611     *     In this case,
6612     *     {@code codePoint - '\u005CuFF21' + 10}
6613     *     is returned.
6614     * <li>The character is one of the fullwidth lowercase Latin letters a
6615     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6616     *     and its code is less than
6617     *     {@code radix + '\u005CuFF41'- 10}.
6618     *     In this case,
6619     *     {@code codePoint - '\u005CuFF41' + 10}
6620     *     is returned.
6621     * </ul>
6622     *
6623     * @param   codePoint the character (Unicode code point) to be converted.
6624     * @param   radix   the radix.
6625     * @return  the numeric value represented by the character in the
6626     *          specified radix.
6627     * @see     Character#forDigit(int, int)
6628     * @see     Character#isDigit(int)
6629     * @since   1.5
6630     */
6631    public static int digit(int codePoint, int radix) {
6632        if (radix < MIN_RADIX || radix > MAX_RADIX) {
6633            return -1;
6634        }
6635        if (codePoint < 128) {
6636            // Optimized for ASCII
6637            int result = -1;
6638            if ('0' <= codePoint && codePoint <= '9') {
6639                result = codePoint - '0';
6640            } else if ('a' <= codePoint && codePoint <= 'z') {
6641                result = 10 + (codePoint - 'a');
6642            } else if ('A' <= codePoint && codePoint <= 'Z') {
6643                result = 10 + (codePoint - 'A');
6644            }
6645            return result < radix ? result : -1;
6646        }
6647        return digitImpl(codePoint, radix);
6648    }
6649
6650    native static int digitImpl(int codePoint, int radix);
6651
6652    /**
6653     * Returns the {@code int} value that the specified Unicode
6654     * character represents. For example, the character
6655     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6656     * an int with a value of 50.
6657     * <p>
6658     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6659     * {@code '\u005Cu005A'}), lowercase
6660     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6661     * full width variant ({@code '\u005CuFF21'} through
6662     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6663     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6664     * through 35. This is independent of the Unicode specification,
6665     * which does not assign numeric values to these {@code char}
6666     * values.
6667     * <p>
6668     * If the character does not have a numeric value, then -1 is returned.
6669     * If the character has a numeric value that cannot be represented as a
6670     * nonnegative integer (for example, a fractional value), then -2
6671     * is returned.
6672     *
6673     * <p><b>Note:</b> This method cannot handle <a
6674     * href="#supplementary"> supplementary characters</a>. To support
6675     * all Unicode characters, including supplementary characters, use
6676     * the {@link #getNumericValue(int)} method.
6677     *
6678     * @param   ch      the character to be converted.
6679     * @return  the numeric value of the character, as a nonnegative {@code int}
6680     *           value; -2 if the character has a numeric value that is not a
6681     *          nonnegative integer; -1 if the character has no numeric value.
6682     * @see     Character#forDigit(int, int)
6683     * @see     Character#isDigit(char)
6684     * @since   1.1
6685     */
6686    public static int getNumericValue(char ch) {
6687        return getNumericValue((int)ch);
6688    }
6689
6690    /**
6691     * Returns the {@code int} value that the specified
6692     * character (Unicode code point) represents. For example, the character
6693     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6694     * an {@code int} with a value of 50.
6695     * <p>
6696     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6697     * {@code '\u005Cu005A'}), lowercase
6698     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6699     * full width variant ({@code '\u005CuFF21'} through
6700     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6701     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6702     * through 35. This is independent of the Unicode specification,
6703     * which does not assign numeric values to these {@code char}
6704     * values.
6705     * <p>
6706     * If the character does not have a numeric value, then -1 is returned.
6707     * If the character has a numeric value that cannot be represented as a
6708     * nonnegative integer (for example, a fractional value), then -2
6709     * is returned.
6710     *
6711     * @param   codePoint the character (Unicode code point) to be converted.
6712     * @return  the numeric value of the character, as a nonnegative {@code int}
6713     *          value; -2 if the character has a numeric value that is not a
6714     *          nonnegative integer; -1 if the character has no numeric value.
6715     * @see     Character#forDigit(int, int)
6716     * @see     Character#isDigit(int)
6717     * @since   1.5
6718     */
6719    public static int getNumericValue(int codePoint) {
6720        // This is both an optimization and papers over differences between Java and ICU.
6721        if (codePoint < 128) {
6722            if (codePoint >= '0' && codePoint <= '9') {
6723                return codePoint - '0';
6724            }
6725            if (codePoint >= 'a' && codePoint <= 'z') {
6726                return codePoint - ('a' - 10);
6727            }
6728            if (codePoint >= 'A' && codePoint <= 'Z') {
6729                return codePoint - ('A' - 10);
6730            }
6731            return -1;
6732        }
6733        // Full-width uppercase A-Z.
6734        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
6735            return codePoint - 0xff17;
6736        }
6737        // Full-width lowercase a-z.
6738        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
6739            return codePoint - 0xff37;
6740        }
6741        return getNumericValueImpl(codePoint);
6742    }
6743
6744    native static int getNumericValueImpl(int codePoint);
6745
6746    /**
6747     * Determines if the specified character is ISO-LATIN-1 white space.
6748     * This method returns {@code true} for the following five
6749     * characters only:
6750     * <table summary="truechars">
6751     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6752     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6753     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6754     *     <td>{@code NEW LINE}</td></tr>
6755     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6756     *     <td>{@code FORM FEED}</td></tr>
6757     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6758     *     <td>{@code CARRIAGE RETURN}</td></tr>
6759     * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6760     *     <td>{@code SPACE}</td></tr>
6761     * </table>
6762     *
6763     * @param      ch   the character to be tested.
6764     * @return     {@code true} if the character is ISO-LATIN-1 white
6765     *             space; {@code false} otherwise.
6766     * @see        Character#isSpaceChar(char)
6767     * @see        Character#isWhitespace(char)
6768     * @deprecated Replaced by isWhitespace(char).
6769     */
6770    @Deprecated
6771    public static boolean isSpace(char ch) {
6772        return (ch <= 0x0020) &&
6773            (((((1L << 0x0009) |
6774            (1L << 0x000A) |
6775            (1L << 0x000C) |
6776            (1L << 0x000D) |
6777            (1L << 0x0020)) >> ch) & 1L) != 0);
6778    }
6779
6780
6781    /**
6782     * Determines if the specified character is a Unicode space character.
6783     * A character is considered to be a space character if and only if
6784     * it is specified to be a space character by the Unicode Standard. This
6785     * method returns true if the character's general category type is any of
6786     * the following:
6787     * <ul>
6788     * <li> {@code SPACE_SEPARATOR}
6789     * <li> {@code LINE_SEPARATOR}
6790     * <li> {@code PARAGRAPH_SEPARATOR}
6791     * </ul>
6792     *
6793     * <p><b>Note:</b> This method cannot handle <a
6794     * href="#supplementary"> supplementary characters</a>. To support
6795     * all Unicode characters, including supplementary characters, use
6796     * the {@link #isSpaceChar(int)} method.
6797     *
6798     * @param   ch      the character to be tested.
6799     * @return  {@code true} if the character is a space character;
6800     *          {@code false} otherwise.
6801     * @see     Character#isWhitespace(char)
6802     * @since   1.1
6803     */
6804    public static boolean isSpaceChar(char ch) {
6805        return isSpaceChar((int)ch);
6806    }
6807
6808    /**
6809     * Determines if the specified character (Unicode code point) is a
6810     * Unicode space character.  A character is considered to be a
6811     * space character if and only if it is specified to be a space
6812     * character by the Unicode Standard. This method returns true if
6813     * the character's general category type is any of the following:
6814     *
6815     * <ul>
6816     * <li> {@link #SPACE_SEPARATOR}
6817     * <li> {@link #LINE_SEPARATOR}
6818     * <li> {@link #PARAGRAPH_SEPARATOR}
6819     * </ul>
6820     *
6821     * @param   codePoint the character (Unicode code point) to be tested.
6822     * @return  {@code true} if the character is a space character;
6823     *          {@code false} otherwise.
6824     * @see     Character#isWhitespace(int)
6825     * @since   1.5
6826     */
6827    public static boolean isSpaceChar(int codePoint) {
6828        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6829        // SPACE or NO-BREAK SPACE?
6830        if (codePoint == 0x20 || codePoint == 0xa0) {
6831            return true;
6832        }
6833        if (codePoint < 0x1000) {
6834            return false;
6835        }
6836        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6837        if (codePoint == 0x1680 || codePoint == 0x180e) {
6838            return true;
6839        }
6840        if (codePoint < 0x2000) {
6841            return false;
6842        }
6843        if (codePoint <= 0xffff) {
6844            // Other whitespace from General Punctuation...
6845            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
6846                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6847        }
6848        // Let icu4c worry about non-BMP code points.
6849        return isSpaceCharImpl(codePoint);
6850    }
6851
6852    static native boolean isSpaceCharImpl(int codePoint);
6853
6854    /**
6855     * Determines if the specified character is white space according to Java.
6856     * A character is a Java whitespace character if and only if it satisfies
6857     * one of the following criteria:
6858     * <ul>
6859     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6860     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6861     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6862     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6863     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6864     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6865     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6866     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6867     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6868     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6869     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6870     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6871     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6872     * </ul>
6873     *
6874     * <p><b>Note:</b> This method cannot handle <a
6875     * href="#supplementary"> supplementary characters</a>. To support
6876     * all Unicode characters, including supplementary characters, use
6877     * the {@link #isWhitespace(int)} method.
6878     *
6879     * @param   ch the character to be tested.
6880     * @return  {@code true} if the character is a Java whitespace
6881     *          character; {@code false} otherwise.
6882     * @see     Character#isSpaceChar(char)
6883     * @since   1.1
6884     */
6885    public static boolean isWhitespace(char ch) {
6886        return isWhitespace((int)ch);
6887    }
6888
6889    /**
6890     * Determines if the specified character (Unicode code point) is
6891     * white space according to Java.  A character is a Java
6892     * whitespace character if and only if it satisfies one of the
6893     * following criteria:
6894     * <ul>
6895     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6896     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6897     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6898     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6899     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6900     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6901     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6902     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6903     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6904     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6905     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6906     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6907     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6908     * </ul>
6909     * <p>
6910     *
6911     * @param   codePoint the character (Unicode code point) to be tested.
6912     * @return  {@code true} if the character is a Java whitespace
6913     *          character; {@code false} otherwise.
6914     * @see     Character#isSpaceChar(int)
6915     * @since   1.5
6916     */
6917    public static boolean isWhitespace(int codePoint) {
6918        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6919        // Any ASCII whitespace character?
6920        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
6921            return true;
6922        }
6923        if (codePoint < 0x1000) {
6924            return false;
6925        }
6926        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6927        if (codePoint == 0x1680 || codePoint == 0x180e) {
6928            return true;
6929        }
6930        if (codePoint < 0x2000) {
6931            return false;
6932        }
6933        // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
6934        if (codePoint == 0x2007 || codePoint == 0x202f) {
6935            return false;
6936        }
6937        if (codePoint <= 0xffff) {
6938            // Other whitespace from General Punctuation...
6939            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
6940                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6941        }
6942        // Let icu4c worry about non-BMP code points.
6943        return isWhitespaceImpl(codePoint);
6944    }
6945
6946    native static boolean isWhitespaceImpl(int codePoint);
6947
6948    /**
6949     * Determines if the specified character is an ISO control
6950     * character.  A character is considered to be an ISO control
6951     * character if its code is in the range {@code '\u005Cu0000'}
6952     * through {@code '\u005Cu001F'} or in the range
6953     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6954     *
6955     * <p><b>Note:</b> This method cannot handle <a
6956     * href="#supplementary"> supplementary characters</a>. To support
6957     * all Unicode characters, including supplementary characters, use
6958     * the {@link #isISOControl(int)} method.
6959     *
6960     * @param   ch      the character to be tested.
6961     * @return  {@code true} if the character is an ISO control character;
6962     *          {@code false} otherwise.
6963     *
6964     * @see     Character#isSpaceChar(char)
6965     * @see     Character#isWhitespace(char)
6966     * @since   1.1
6967     */
6968    public static boolean isISOControl(char ch) {
6969        return isISOControl((int)ch);
6970    }
6971
6972    /**
6973     * Determines if the referenced character (Unicode code point) is an ISO control
6974     * character.  A character is considered to be an ISO control
6975     * character if its code is in the range {@code '\u005Cu0000'}
6976     * through {@code '\u005Cu001F'} or in the range
6977     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6978     *
6979     * @param   codePoint the character (Unicode code point) to be tested.
6980     * @return  {@code true} if the character is an ISO control character;
6981     *          {@code false} otherwise.
6982     * @see     Character#isSpaceChar(int)
6983     * @see     Character#isWhitespace(int)
6984     * @since   1.5
6985     */
6986    public static boolean isISOControl(int codePoint) {
6987        // Optimized form of:
6988        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6989        //     (codePoint >= 0x7F && codePoint <= 0x9F);
6990        return codePoint <= 0x9F &&
6991            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6992    }
6993
6994    /**
6995     * Returns a value indicating a character's general category.
6996     *
6997     * <p><b>Note:</b> This method cannot handle <a
6998     * href="#supplementary"> supplementary characters</a>. To support
6999     * all Unicode characters, including supplementary characters, use
7000     * the {@link #getType(int)} method.
7001     *
7002     * @param   ch      the character to be tested.
7003     * @return  a value of type {@code int} representing the
7004     *          character's general category.
7005     * @see     Character#COMBINING_SPACING_MARK
7006     * @see     Character#CONNECTOR_PUNCTUATION
7007     * @see     Character#CONTROL
7008     * @see     Character#CURRENCY_SYMBOL
7009     * @see     Character#DASH_PUNCTUATION
7010     * @see     Character#DECIMAL_DIGIT_NUMBER
7011     * @see     Character#ENCLOSING_MARK
7012     * @see     Character#END_PUNCTUATION
7013     * @see     Character#FINAL_QUOTE_PUNCTUATION
7014     * @see     Character#FORMAT
7015     * @see     Character#INITIAL_QUOTE_PUNCTUATION
7016     * @see     Character#LETTER_NUMBER
7017     * @see     Character#LINE_SEPARATOR
7018     * @see     Character#LOWERCASE_LETTER
7019     * @see     Character#MATH_SYMBOL
7020     * @see     Character#MODIFIER_LETTER
7021     * @see     Character#MODIFIER_SYMBOL
7022     * @see     Character#NON_SPACING_MARK
7023     * @see     Character#OTHER_LETTER
7024     * @see     Character#OTHER_NUMBER
7025     * @see     Character#OTHER_PUNCTUATION
7026     * @see     Character#OTHER_SYMBOL
7027     * @see     Character#PARAGRAPH_SEPARATOR
7028     * @see     Character#PRIVATE_USE
7029     * @see     Character#SPACE_SEPARATOR
7030     * @see     Character#START_PUNCTUATION
7031     * @see     Character#SURROGATE
7032     * @see     Character#TITLECASE_LETTER
7033     * @see     Character#UNASSIGNED
7034     * @see     Character#UPPERCASE_LETTER
7035     * @since   1.1
7036     */
7037    public static int getType(char ch) {
7038        return getType((int)ch);
7039    }
7040
7041    /**
7042     * Returns a value indicating a character's general category.
7043     *
7044     * @param   codePoint the character (Unicode code point) to be tested.
7045     * @return  a value of type {@code int} representing the
7046     *          character's general category.
7047     * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
7048     * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
7049     * @see     Character#CONTROL CONTROL
7050     * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
7051     * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
7052     * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
7053     * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
7054     * @see     Character#END_PUNCTUATION END_PUNCTUATION
7055     * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
7056     * @see     Character#FORMAT FORMAT
7057     * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
7058     * @see     Character#LETTER_NUMBER LETTER_NUMBER
7059     * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
7060     * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
7061     * @see     Character#MATH_SYMBOL MATH_SYMBOL
7062     * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
7063     * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
7064     * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
7065     * @see     Character#OTHER_LETTER OTHER_LETTER
7066     * @see     Character#OTHER_NUMBER OTHER_NUMBER
7067     * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
7068     * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
7069     * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
7070     * @see     Character#PRIVATE_USE PRIVATE_USE
7071     * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
7072     * @see     Character#START_PUNCTUATION START_PUNCTUATION
7073     * @see     Character#SURROGATE SURROGATE
7074     * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
7075     * @see     Character#UNASSIGNED UNASSIGNED
7076     * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
7077     * @since   1.5
7078     */
7079    public static int getType(int codePoint) {
7080        int type = getTypeImpl(codePoint);
7081        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
7082        if (type <= Character.FORMAT) {
7083            return type;
7084        }
7085        return (type + 1);
7086    }
7087
7088    static native int getTypeImpl(int codePoint);
7089
7090    /**
7091     * Determines the character representation for a specific digit in
7092     * the specified radix. If the value of {@code radix} is not a
7093     * valid radix, or the value of {@code digit} is not a valid
7094     * digit in the specified radix, the null character
7095     * ({@code '\u005Cu0000'}) is returned.
7096     * <p>
7097     * The {@code radix} argument is valid if it is greater than or
7098     * equal to {@code MIN_RADIX} and less than or equal to
7099     * {@code MAX_RADIX}. The {@code digit} argument is valid if
7100     * {@code 0 <= digit < radix}.
7101     * <p>
7102     * If the digit is less than 10, then
7103     * {@code '0' + digit} is returned. Otherwise, the value
7104     * {@code 'a' + digit - 10} is returned.
7105     *
7106     * @param   digit   the number to convert to a character.
7107     * @param   radix   the radix.
7108     * @return  the {@code char} representation of the specified digit
7109     *          in the specified radix.
7110     * @see     Character#MIN_RADIX
7111     * @see     Character#MAX_RADIX
7112     * @see     Character#digit(char, int)
7113     */
7114    public static char forDigit(int digit, int radix) {
7115        if ((digit >= radix) || (digit < 0)) {
7116            return '\0';
7117        }
7118        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
7119            return '\0';
7120        }
7121        if (digit < 10) {
7122            return (char)('0' + digit);
7123        }
7124        return (char)('a' - 10 + digit);
7125    }
7126
7127    /**
7128     * Returns the Unicode directionality property for the given
7129     * character.  Character directionality is used to calculate the
7130     * visual ordering of text. The directionality value of undefined
7131     * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7132     *
7133     * <p><b>Note:</b> This method cannot handle <a
7134     * href="#supplementary"> supplementary characters</a>. To support
7135     * all Unicode characters, including supplementary characters, use
7136     * the {@link #getDirectionality(int)} method.
7137     *
7138     * @param  ch {@code char} for which the directionality property
7139     *            is requested.
7140     * @return the directionality property of the {@code char} value.
7141     *
7142     * @see Character#DIRECTIONALITY_UNDEFINED
7143     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7144     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7145     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7146     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7147     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7148     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7149     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
7150     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7151     * @see Character#DIRECTIONALITY_NONSPACING_MARK
7152     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7153     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7154     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7155     * @see Character#DIRECTIONALITY_WHITESPACE
7156     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7157     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7158     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7159     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7160     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7161     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7162     * @since 1.4
7163     */
7164    public static byte getDirectionality(char ch) {
7165        return getDirectionality((int)ch);
7166    }
7167
7168    /**
7169     * Returns the Unicode directionality property for the given
7170     * character (Unicode code point).  Character directionality is
7171     * used to calculate the visual ordering of text. The
7172     * directionality value of undefined character is {@link
7173     * #DIRECTIONALITY_UNDEFINED}.
7174     *
7175     * @param   codePoint the character (Unicode code point) for which
7176     *          the directionality property is requested.
7177     * @return the directionality property of the character.
7178     *
7179     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7180     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7181     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7182     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7183     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7184     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7185     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7186     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7187     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7188     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7189     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7190     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7191     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7192     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7193     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7194     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7195     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7196     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7197     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7198     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7199     * @since    1.5
7200     */
7201    public static byte getDirectionality(int codePoint) {
7202        if (getType(codePoint) == Character.UNASSIGNED) {
7203            return Character.DIRECTIONALITY_UNDEFINED;
7204        }
7205
7206        byte directionality = getDirectionalityImpl(codePoint);
7207        if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
7208            return DIRECTIONALITY[directionality];
7209        }
7210        return Character.DIRECTIONALITY_UNDEFINED;
7211    }
7212
7213    native static byte getDirectionalityImpl(int codePoint);
7214    /**
7215     * Determines whether the character is mirrored according to the
7216     * Unicode specification.  Mirrored characters should have their
7217     * glyphs horizontally mirrored when displayed in text that is
7218     * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7219     * PARENTHESIS is semantically defined to be an <i>opening
7220     * parenthesis</i>.  This will appear as a "(" in text that is
7221     * left-to-right but as a ")" in text that is right-to-left.
7222     *
7223     * <p><b>Note:</b> This method cannot handle <a
7224     * href="#supplementary"> supplementary characters</a>. To support
7225     * all Unicode characters, including supplementary characters, use
7226     * the {@link #isMirrored(int)} method.
7227     *
7228     * @param  ch {@code char} for which the mirrored property is requested
7229     * @return {@code true} if the char is mirrored, {@code false}
7230     *         if the {@code char} is not mirrored or is not defined.
7231     * @since 1.4
7232     */
7233    public static boolean isMirrored(char ch) {
7234        return isMirrored((int)ch);
7235    }
7236
7237    /**
7238     * Determines whether the specified character (Unicode code point)
7239     * is mirrored according to the Unicode specification.  Mirrored
7240     * characters should have their glyphs horizontally mirrored when
7241     * displayed in text that is right-to-left.  For example,
7242     * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7243     * defined to be an <i>opening parenthesis</i>.  This will appear
7244     * as a "(" in text that is left-to-right but as a ")" in text
7245     * that is right-to-left.
7246     *
7247     * @param   codePoint the character (Unicode code point) to be tested.
7248     * @return  {@code true} if the character is mirrored, {@code false}
7249     *          if the character is not mirrored or is not defined.
7250     * @since   1.5
7251     */
7252    public static boolean isMirrored(int codePoint) {
7253        return isMirroredImpl(codePoint);
7254    }
7255
7256    native static boolean isMirroredImpl(int codePoint);
7257    /**
7258     * Compares two {@code Character} objects numerically.
7259     *
7260     * @param   anotherCharacter   the {@code Character} to be compared.
7261
7262     * @return  the value {@code 0} if the argument {@code Character}
7263     *          is equal to this {@code Character}; a value less than
7264     *          {@code 0} if this {@code Character} is numerically less
7265     *          than the {@code Character} argument; and a value greater than
7266     *          {@code 0} if this {@code Character} is numerically greater
7267     *          than the {@code Character} argument (unsigned comparison).
7268     *          Note that this is strictly a numerical comparison; it is not
7269     *          locale-dependent.
7270     * @since   1.2
7271     */
7272    public int compareTo(Character anotherCharacter) {
7273        return compare(this.value, anotherCharacter.value);
7274    }
7275
7276    /**
7277     * Compares two {@code char} values numerically.
7278     * The value returned is identical to what would be returned by:
7279     * <pre>
7280     *    Character.valueOf(x).compareTo(Character.valueOf(y))
7281     * </pre>
7282     *
7283     * @param  x the first {@code char} to compare
7284     * @param  y the second {@code char} to compare
7285     * @return the value {@code 0} if {@code x == y};
7286     *         a value less than {@code 0} if {@code x < y}; and
7287     *         a value greater than {@code 0} if {@code x > y}
7288     * @since 1.7
7289     */
7290    public static int compare(char x, char y) {
7291        return x - y;
7292    }
7293
7294    /**
7295     * The number of bits used to represent a <tt>char</tt> value in unsigned
7296     * binary form, constant {@code 16}.
7297     *
7298     * @since 1.5
7299     */
7300    public static final int SIZE = 16;
7301
7302    /**
7303     * The number of bytes used to represent a {@code char} value in unsigned
7304     * binary form.
7305     *
7306     * @since 1.8
7307     */
7308    public static final int BYTES = SIZE / Byte.SIZE;
7309
7310    /**
7311     * Returns the value obtained by reversing the order of the bytes in the
7312     * specified <tt>char</tt> value.
7313     *
7314     * @param ch The {@code char} of which to reverse the byte order.
7315     * @return the value obtained by reversing (or, equivalently, swapping)
7316     *     the bytes in the specified <tt>char</tt> value.
7317     * @since 1.5
7318     */
7319    public static char reverseBytes(char ch) {
7320        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7321    }
7322
7323    /**
7324     * Returns the Unicode name of the specified character
7325     * {@code codePoint}, or null if the code point is
7326     * {@link #UNASSIGNED unassigned}.
7327     * <p>
7328     * Note: if the specified character is not assigned a name by
7329     * the <i>UnicodeData</i> file (part of the Unicode Character
7330     * Database maintained by the Unicode Consortium), the returned
7331     * name is the same as the result of expression.
7332     *
7333     * <blockquote>{@code
7334     *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7335     *     + " "
7336     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7337     *
7338     * }</blockquote>
7339     *
7340     * @param  codePoint the character (Unicode code point)
7341     *
7342     * @return the Unicode name of the specified character, or null if
7343     *         the code point is unassigned.
7344     *
7345     * @exception IllegalArgumentException if the specified
7346     *            {@code codePoint} is not a valid Unicode
7347     *            code point.
7348     *
7349     * @since 1.7
7350     */
7351    public static String getName(int codePoint) {
7352        if (!isValidCodePoint(codePoint)) {
7353            throw new IllegalArgumentException();
7354        }
7355        String name = getNameImpl(codePoint);
7356        if (name != null)
7357            return name;
7358        if (getType(codePoint) == UNASSIGNED)
7359            return null;
7360        UnicodeBlock block = UnicodeBlock.of(codePoint);
7361        if (block != null)
7362            return block.toString().replace('_', ' ') + " "
7363                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7364        // should never come here
7365        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7366    }
7367
7368    private static native String getNameImpl(int codePoint);
7369}
7370